From 9f56c58dd93c738d9afc33ffcf9572b929672c0c Mon Sep 17 00:00:00 2001 From: Kenneth Gitere Date: Sat, 16 May 2020 10:09:44 +0300 Subject: [PATCH] Add simple CLI wrapper --- Cargo.lock | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/cli.rs | 13 +++++ src/main.rs | 44 ++++++++--------- 4 files changed, 170 insertions(+), 21 deletions(-) create mode 100644 src/cli.rs diff --git a/Cargo.lock b/Cargo.lock index f48fe72..fafdfb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,6 +15,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi 0.3.8", +] + [[package]] name = "async-std" version = "1.5.0" @@ -50,6 +59,17 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi 0.3.8", +] + [[package]] name = "autocfg" version = "0.1.7" @@ -155,6 +175,21 @@ dependencies = [ "time", ] +[[package]] +name = "clap" +version = "2.33.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "cloudabi" version = "0.0.3" @@ -523,6 +558,15 @@ dependencies = [ "wasi", ] +[[package]] +name = "heck" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "hermit-abi" version = "0.1.12" @@ -972,6 +1016,7 @@ dependencies = [ "epub-builder", "kuchiki", "md5", + "structopt", "surf", "url", ] @@ -1110,6 +1155,32 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "proc-macro-error" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "syn-mid", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.15" @@ -1577,6 +1648,36 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef" +dependencies = [ + "clap", + "lazy_static 1.4.0", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "surf" version = "1.0.3" @@ -1610,6 +1711,17 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "syn-mid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tempdir" version = "0.3.7" @@ -1631,6 +1743,15 @@ dependencies = [ "utf-8", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thin-slice" version = "0.1.1" @@ -1694,6 +1815,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" + [[package]] name = "unicode-width" version = "0.1.7" @@ -1747,6 +1874,12 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fc439f2794e98976c88a2a2dafce96b930fe8010b0a256b3c2199a773933168" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "version_check" version = "0.9.1" diff --git a/Cargo.toml b/Cargo.toml index 867a34a..801a3a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,5 @@ epub-builder = "0.4.5" kuchiki = "0.8.0" md5 = "0.7.0" surf = "1.0.3" +structopt = { version = "0.3" } url = "2.1.1" \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..ba0273d --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,13 @@ +use structopt::StructOpt; + +#[derive(Debug, StructOpt)] +#[structopt(name = "paperoni")] +/// Paperoni is an article downloader. +/// +/// It takes a url and downloads the article content from it and +/// saves it to an epub. +pub struct Opts { + // #[structopt(conflicts_with("links"))] + /// Url of a web article + pub url: Option, +} diff --git a/src/main.rs b/src/main.rs index d790f9b..6f15e9e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,22 +2,35 @@ use std::fs::File; use async_std::{fs::create_dir, fs::remove_dir_all, task}; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; +use structopt::StructOpt; use url::Url; +mod cli; mod extractor; use extractor::Extractor; fn main() { + let opt = cli::Opts::from_args(); + if let Some(url) = opt.url { + println!("Downloading single article"); + download(url) + } +} + +async fn fetch_url(url: &str) -> String { + let client = surf::Client::new(); + println!("Fetching..."); + // TODO: Add middleware for following redirects + client + .get(url) + .recv_string() + .await + .expect("Unable to fetch URL") +} + +fn download(url: String) { task::block_on(async { - let urls = vec![ - "https://saveandrun.com/posts/2020-01-24-generating-mazes-with-haskell-part-1.html", - "https://saveandrun.com/posts/2020-04-05-querying-pacman-with-datalog.html", - "https://blog.hipstermojo.xyz/posts/redis-orm-preface/", - "https://vuejsdevelopers.com/2020/03/31/vue-js-form-composition-api/?utm_campaign=xl5&utm_medium=article&utm_source=vuejsnews#adding-validators", - "https://medium.com/typeforms-engineering-blog/the-beginners-guide-to-oauth-dancing-4b8f3666de10", - "https://dev.to/steelwolf180/full-stack-development-in-django-3768" - ]; - let html = fetch_url(urls[4]).await; + let html = fetch_url(&url).await; let mut extractor = Extractor::from_html(&html); println!("Extracting"); extractor.extract_content(); @@ -25,7 +38,7 @@ fn main() { .await .expect("Unable to create res/ output folder"); extractor - .download_images(&Url::parse(urls[5]).unwrap()) + .download_images(&Url::parse(&url).unwrap()) .await .expect("Unable to download images"); let mut out_file = File::create("out.epub").unwrap(); @@ -51,14 +64,3 @@ fn main() { remove_dir_all("res/").await.unwrap(); }) } - -async fn fetch_url(url: &str) -> String { - let client = surf::Client::new(); - println!("Fetching..."); - // TODO: Add middleware for following redirects - client - .get(url) - .recv_string() - .await - .expect("Unable to fetch URL") -}