From 60fb30e8a2f4511cc3860cf9b0e208ce6687e2ab Mon Sep 17 00:00:00 2001 From: Kenneth Gitere Date: Tue, 20 Apr 2021 21:06:54 +0300 Subject: [PATCH] Add url field in Extractor struct --- src/extractor.rs | 12 +++++++----- src/main.rs | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/extractor.rs b/src/extractor.rs index 64b9a2a..507ff6a 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -15,22 +15,24 @@ pub struct Extractor { article: Option, pub img_urls: Vec, readability: Readability, + pub url: String, } impl Extractor { /// Create a new instance of an HTML extractor given an HTML string - pub fn from_html(html_str: &str) -> Self { + pub fn from_html(html_str: &str, url: &str) -> Self { Extractor { article: None, img_urls: Vec::new(), readability: Readability::new(html_str), + url: url.to_string(), } } /// Locates and extracts the HTML in a document which is determined to be /// the source of the content - pub fn extract_content(&mut self, url: &str) { - self.readability.parse(url); + pub fn extract_content(&mut self) { + self.readability.parse(&self.url); if let Some(article_node_ref) = &self.readability.article_node { let template = r#" @@ -157,8 +159,8 @@ mod test { #[test] fn test_extract_img_urls() { - let mut extractor = Extractor::from_html(TEST_HTML); - extractor.extract_content("http://example.com/"); + let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/"); + extractor.extract_content(); extractor.extract_img_urls(); assert!(extractor.img_urls.len() > 0); diff --git a/src/main.rs b/src/main.rs index 83884be..d3d9cc2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -44,9 +44,9 @@ fn download(app_config: AppConfig) { match fetch_result { Ok((url, html)) => { // println!("Extracting"); - let mut extractor = Extractor::from_html(&html); + let mut extractor = Extractor::from_html(&html, &url); bar.set_message("Extracting..."); - extractor.extract_content(&url); + extractor.extract_content(); if extractor.article().is_some() { extractor.extract_img_urls();