From 8691b0166f9d046cb2490b6754834fdf87059732 Mon Sep 17 00:00:00 2001 From: Kenneth Gitere Date: Tue, 8 Jun 2021 20:35:52 +0300 Subject: [PATCH] fix: fix panic when unwrapping a base URI chore: add message when downloading articles to a specified output-dir --- src/main.rs | 11 +++++++++++ src/moz_readability/mod.rs | 29 +++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index dc4787d..e378115 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,6 +39,16 @@ fn main() { fn run(app_config: AppConfig) { let mut errors = Vec::new(); let mut partial_downloads = Vec::new(); + + if let Some(dir_name) = &app_config.output_directory { + let noun = if app_config.urls.len() > 1 { + "articles" + } else { + "article" + }; + println!("Downloading {} to {}", noun, dir_name); + } + let bar = if app_config.can_disable_progress_bar { ProgressBar::hidden() } else { @@ -50,6 +60,7 @@ fn run(app_config: AppConfig) { enabled_bar.enable_steady_tick(500); enabled_bar }; + let articles = download(&app_config, &bar, &mut partial_downloads, &mut errors); bar.finish_with_message("Downloaded articles"); diff --git a/src/moz_readability/mod.rs b/src/moz_readability/mod.rs index 7549f24..0f4fc66 100644 --- a/src/moz_readability/mod.rs +++ b/src/moz_readability/mod.rs @@ -659,10 +659,24 @@ impl Readability { .map(|node_ref| { let node_attrs = node_ref.attributes.borrow(); let href = node_attrs.get("href").unwrap(); - if href.trim() == "/" { - document_uri.join("/").unwrap() - } else { - Url::parse(href).unwrap() + + match Url::parse(href) { + Ok(url) => url, + Err(e) => match e { + url::ParseError::RelativeUrlWithoutBase => { + match document_uri.join(href) { + Ok(joined_url) => joined_url, + Err(e) => panic!( + "{:} unable to parse url {:?} on element {}", + e, href, &node_ref.name.local + ), + } + } + e => panic!( + "{:} unable to parse url {:?} on element {}", + e, href, &node_ref.name.local + ), + }, } }) .next() @@ -1609,8 +1623,11 @@ impl Readability { // // class name "comment", etc), and turn divs into P tags where they have been // // used inappropriately (as in, where they contain no other block level elements.) let mut elements_to_score: Vec = Vec::new(); - let mut node = self.root_node.select_first("html") - .ok().map(|n| n.as_node().clone()); + let mut node = self + .root_node + .select_first("html") + .ok() + .map(|n| n.as_node().clone()); while let Some(node_ref) = node { let node_elem = node_ref.as_element().unwrap();