Change CLI option to allow for multiple arguments

Add basic looping in async runtime
2020-10-22 15:22:56 +03:00 · 2020-10-22 15:22:56 +03:00 · 1b4c4ee658
commit 1b4c4ee658
parent db11e78d8c
2 changed files with 57 additions and 46 deletions
--- a/src/cli.rs
+++ b/src/cli.rs
@ -9,5 +9,5 @@ use structopt::StructOpt;
 pub struct Opts {
    // #[structopt(conflicts_with("links"))]
    /// Url of a web article
-    pub url: Option<String>,
+    pub urls: Vec<String>,
 }
--- a/src/main.rs
+++ b/src/main.rs
@ -15,63 +15,74 @@ mod moz_readability;
 use extractor::Extractor;
 fn main() {
    let opt = cli::Opts::from_args();
-    if let Some(url) = opt.url {
+    if !opt.urls.is_empty() {
        println!("Downloading single article");
-        download(url)
+        download(opt.urls);
    }
 }

-async fn fetch_url(url: &str) -> String {
+type HTMLResource = (String, String);
+
+async fn fetch_url(url: &str) -> HTMLResource {
    let client = surf::Client::new();
    println!("Fetching...");
    // TODO: Add middleware for following redirects
-    client
-        .get(url)
-        .recv_string()
-        .await
-        .expect("Unable to fetch URL")
+    (
+        url.to_string(),
+        client
+            .get(url)
+            .recv_string()
+            .await
+            .expect("Unable to fetch URL"),
+    )
 }

-fn download(url: String) {
+fn download(urls: Vec<String>) {
+    let mut async_url_tasks = Vec::with_capacity(urls.len());
+    for url in urls {
+        async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
+    }
    task::block_on(async {
-        let html = fetch_url(&url).await;
-        println!("Extracting");
-        let mut extractor = Extractor::from_html(&html);
-        extractor.extract_content(&url);
-        if extractor.article().is_some() {
-            create_dir("res/")
-                .await
-                .expect("Unable to create res/ output folder");
-            extractor
-                .download_images(&Url::parse(&url).unwrap())
-                .await
-                .expect("Unable to download images");
-            let mut out_file =
-                File::create(format!("{}.epub", extractor.metadata().title())).unwrap();
-            let mut html_buf = Vec::new();
-            extractor
-                .article()
-                .unwrap()
-                .serialize(&mut html_buf)
-                .expect("Unable to serialize");
-            let html_buf = std::str::from_utf8(&html_buf).unwrap();
-            let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
-            if let Some(author) = extractor.metadata().byline() {
-                epub.metadata("author", author).unwrap();
-            }
-            epub.metadata("title", extractor.metadata().title())
-                .unwrap();
-            epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
-                .unwrap();
-            for img in extractor.img_urls {
-                let file_path = format!("{}", &img.0);
+        for url_task in async_url_tasks {
+            let (url, html) = url_task.await;
+            println!("Extracting");
+            let mut extractor = Extractor::from_html(&html);
+            extractor.extract_content(&url);
+            if extractor.article().is_some() {
+                create_dir("res/")
+                    .await
+                    .expect("Unable to create res/ output folder");
+                extractor
+                    .download_images(&Url::parse(&url).unwrap())
+                    .await
+                    .expect("Unable to download images");
+                let mut out_file =
+                    File::create(format!("{}.epub", extractor.metadata().title())).unwrap();
+                let mut html_buf = Vec::new();
+                extractor
+                    .article()
+                    .unwrap()
+                    .serialize(&mut html_buf)
+                    .expect("Unable to serialize");
+                let html_buf = std::str::from_utf8(&html_buf).unwrap();
+                let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
+                if let Some(author) = extractor.metadata().byline() {
+                    epub.metadata("author", author).unwrap();
+                }
+                epub.metadata("title", extractor.metadata().title())
+                    .unwrap();
+                epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
+                    .unwrap();
+                for img in extractor.img_urls {
+                    let file_path = format!("{}", &img.0);

-                let img_buf = File::open(file_path).expect("Can't read file");
-                epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap();
+                    let img_buf = File::open(file_path).expect("Can't read file");
+                    epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap();
+                }
+                epub.generate(&mut out_file).unwrap();
+                println!("Cleaning up");
+                remove_dir_all("res/").await.unwrap();
            }
-            epub.generate(&mut out_file).unwrap();
-            println!("Cleaning up");
-            remove_dir_all("res/").await.unwrap();
        }
    })
 }