fix: fix panic when unwrapping a base URI

chore: add message when downloading articles to a specified output-dir
This commit is contained in:
Kenneth Gitere 2021-06-08 20:35:52 +03:00
parent 5fbfb9c806
commit 8691b0166f
2 changed files with 34 additions and 6 deletions

View file

@ -39,6 +39,16 @@ fn main() {
fn run(app_config: AppConfig) {
let mut errors = Vec::new();
let mut partial_downloads = Vec::new();
if let Some(dir_name) = &app_config.output_directory {
let noun = if app_config.urls.len() > 1 {
"articles"
} else {
"article"
};
println!("Downloading {} to {}", noun, dir_name);
}
let bar = if app_config.can_disable_progress_bar {
ProgressBar::hidden()
} else {
@ -50,6 +60,7 @@ fn run(app_config: AppConfig) {
enabled_bar.enable_steady_tick(500);
enabled_bar
};
let articles = download(&app_config, &bar, &mut partial_downloads, &mut errors);
bar.finish_with_message("Downloaded articles");

View file

@ -659,10 +659,24 @@ impl Readability {
.map(|node_ref| {
let node_attrs = node_ref.attributes.borrow();
let href = node_attrs.get("href").unwrap();
if href.trim() == "/" {
document_uri.join("/").unwrap()
} else {
Url::parse(href).unwrap()
match Url::parse(href) {
Ok(url) => url,
Err(e) => match e {
url::ParseError::RelativeUrlWithoutBase => {
match document_uri.join(href) {
Ok(joined_url) => joined_url,
Err(e) => panic!(
"{:} unable to parse url {:?} on element {}",
e, href, &node_ref.name.local
),
}
}
e => panic!(
"{:} unable to parse url {:?} on element {}",
e, href, &node_ref.name.local
),
},
}
})
.next()
@ -1609,8 +1623,11 @@ impl Readability {
// // class name "comment", etc), and turn divs into P tags where they have been
// // used inappropriately (as in, where they contain no other block level elements.)
let mut elements_to_score: Vec<NodeRef> = Vec::new();
let mut node = self.root_node.select_first("html")
.ok().map(|n| n.as_node().clone());
let mut node = self
.root_node
.select_first("html")
.ok()
.map(|n| n.as_node().clone());
while let Some(node_ref) = node {
let node_elem = node_ref.as_element().unwrap();