Add url field in Extractor struct

This commit is contained in:
Kenneth Gitere 2021-04-20 21:06:54 +03:00
parent b217448601
commit 60fb30e8a2
2 changed files with 9 additions and 7 deletions

View file

@ -15,22 +15,24 @@ pub struct Extractor {
article: Option<NodeRef>, article: Option<NodeRef>,
pub img_urls: Vec<ResourceInfo>, pub img_urls: Vec<ResourceInfo>,
readability: Readability, readability: Readability,
pub url: String,
} }
impl Extractor { impl Extractor {
/// Create a new instance of an HTML extractor given an HTML string /// Create a new instance of an HTML extractor given an HTML string
pub fn from_html(html_str: &str) -> Self { pub fn from_html(html_str: &str, url: &str) -> Self {
Extractor { Extractor {
article: None, article: None,
img_urls: Vec::new(), img_urls: Vec::new(),
readability: Readability::new(html_str), readability: Readability::new(html_str),
url: url.to_string(),
} }
} }
/// Locates and extracts the HTML in a document which is determined to be /// Locates and extracts the HTML in a document which is determined to be
/// the source of the content /// the source of the content
pub fn extract_content(&mut self, url: &str) { pub fn extract_content(&mut self) {
self.readability.parse(url); self.readability.parse(&self.url);
if let Some(article_node_ref) = &self.readability.article_node { if let Some(article_node_ref) = &self.readability.article_node {
let template = r#" let template = r#"
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
@ -157,8 +159,8 @@ mod test {
#[test] #[test]
fn test_extract_img_urls() { fn test_extract_img_urls() {
let mut extractor = Extractor::from_html(TEST_HTML); let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/");
extractor.extract_content("http://example.com/"); extractor.extract_content();
extractor.extract_img_urls(); extractor.extract_img_urls();
assert!(extractor.img_urls.len() > 0); assert!(extractor.img_urls.len() > 0);

View file

@ -44,9 +44,9 @@ fn download(app_config: AppConfig) {
match fetch_result { match fetch_result {
Ok((url, html)) => { Ok((url, html)) => {
// println!("Extracting"); // println!("Extracting");
let mut extractor = Extractor::from_html(&html); let mut extractor = Extractor::from_html(&html, &url);
bar.set_message("Extracting..."); bar.set_message("Extracting...");
extractor.extract_content(&url); extractor.extract_content();
if extractor.article().is_some() { if extractor.article().is_some() {
extractor.extract_img_urls(); extractor.extract_img_urls();