From 1b4c4ee6581f5be6110dfee7667bd67d1b6c854a Mon Sep 17 00:00:00 2001 From: Kenneth Gitere Date: Thu, 22 Oct 2020 15:22:56 +0300 Subject: [PATCH] Change CLI option to allow for multiple arguments Add basic looping in async runtime --- src/cli.rs | 2 +- src/main.rs | 101 +++++++++++++++++++++++++++++----------------------- 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index ba0273d..e0e12db 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -9,5 +9,5 @@ use structopt::StructOpt; pub struct Opts { // #[structopt(conflicts_with("links"))] /// Url of a web article - pub url: Option, + pub urls: Vec, } diff --git a/src/main.rs b/src/main.rs index 1ea3d62..ea59ca0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,63 +15,74 @@ mod moz_readability; use extractor::Extractor; fn main() { let opt = cli::Opts::from_args(); - if let Some(url) = opt.url { + if !opt.urls.is_empty() { println!("Downloading single article"); - download(url) + download(opt.urls); } } -async fn fetch_url(url: &str) -> String { +type HTMLResource = (String, String); + +async fn fetch_url(url: &str) -> HTMLResource { let client = surf::Client::new(); println!("Fetching..."); // TODO: Add middleware for following redirects - client - .get(url) - .recv_string() - .await - .expect("Unable to fetch URL") + ( + url.to_string(), + client + .get(url) + .recv_string() + .await + .expect("Unable to fetch URL"), + ) } -fn download(url: String) { +fn download(urls: Vec) { + let mut async_url_tasks = Vec::with_capacity(urls.len()); + for url in urls { + async_url_tasks.push(task::spawn(async move { fetch_url(&url).await })); + } task::block_on(async { - let html = fetch_url(&url).await; - println!("Extracting"); - let mut extractor = Extractor::from_html(&html); - extractor.extract_content(&url); - if extractor.article().is_some() { - create_dir("res/") - .await - .expect("Unable to create res/ output folder"); - extractor - .download_images(&Url::parse(&url).unwrap()) - .await - .expect("Unable to download images"); - let mut out_file = - File::create(format!("{}.epub", extractor.metadata().title())).unwrap(); - let mut html_buf = Vec::new(); - extractor - .article() - .unwrap() - .serialize(&mut html_buf) - .expect("Unable to serialize"); - let html_buf = std::str::from_utf8(&html_buf).unwrap(); - let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); - if let Some(author) = extractor.metadata().byline() { - epub.metadata("author", author).unwrap(); - } - epub.metadata("title", extractor.metadata().title()) - .unwrap(); - epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes())) - .unwrap(); - for img in extractor.img_urls { - let file_path = format!("{}", &img.0); + for url_task in async_url_tasks { + let (url, html) = url_task.await; + println!("Extracting"); + let mut extractor = Extractor::from_html(&html); + extractor.extract_content(&url); + if extractor.article().is_some() { + create_dir("res/") + .await + .expect("Unable to create res/ output folder"); + extractor + .download_images(&Url::parse(&url).unwrap()) + .await + .expect("Unable to download images"); + let mut out_file = + File::create(format!("{}.epub", extractor.metadata().title())).unwrap(); + let mut html_buf = Vec::new(); + extractor + .article() + .unwrap() + .serialize(&mut html_buf) + .expect("Unable to serialize"); + let html_buf = std::str::from_utf8(&html_buf).unwrap(); + let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); + if let Some(author) = extractor.metadata().byline() { + epub.metadata("author", author).unwrap(); + } + epub.metadata("title", extractor.metadata().title()) + .unwrap(); + epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes())) + .unwrap(); + for img in extractor.img_urls { + let file_path = format!("{}", &img.0); - let img_buf = File::open(file_path).expect("Can't read file"); - epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap(); + let img_buf = File::open(file_path).expect("Can't read file"); + epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap(); + } + epub.generate(&mut out_file).unwrap(); + println!("Cleaning up"); + remove_dir_all("res/").await.unwrap(); } - epub.generate(&mut out_file).unwrap(); - println!("Cleaning up"); - remove_dir_all("res/").await.unwrap(); } }) }