Add simple CLI wrapper

This commit is contained in:
Kenneth Gitere 2020-05-16 10:09:44 +03:00
parent c30d5f732e
commit 9f56c58dd9
4 changed files with 170 additions and 21 deletions

133
Cargo.lock generated
View file

@ -15,6 +15,15 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi 0.3.8",
]
[[package]] [[package]]
name = "async-std" name = "async-std"
version = "1.5.0" version = "1.5.0"
@ -50,6 +59,17 @@ dependencies = [
"winapi 0.3.8", "winapi 0.3.8",
] ]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi 0.3.8",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "0.1.7" version = "0.1.7"
@ -155,6 +175,21 @@ dependencies = [
"time", "time",
] ]
[[package]]
name = "clap"
version = "2.33.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
dependencies = [
"ansi_term",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]] [[package]]
name = "cloudabi" name = "cloudabi"
version = "0.0.3" version = "0.0.3"
@ -523,6 +558,15 @@ dependencies = [
"wasi", "wasi",
] ]
[[package]]
name = "heck"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
dependencies = [
"unicode-segmentation",
]
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.12" version = "0.1.12"
@ -972,6 +1016,7 @@ dependencies = [
"epub-builder", "epub-builder",
"kuchiki", "kuchiki",
"md5", "md5",
"structopt",
"surf", "surf",
"url", "url",
] ]
@ -1110,6 +1155,32 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-error"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98e9e4b82e0ef281812565ea4751049f1bdcdfccda7d3f459f2e138a40c08678"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f5444ead4e9935abd7f27dc51f7e852a0569ac888096d5ec2499470794e2e53"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn-mid",
"version_check",
]
[[package]] [[package]]
name = "proc-macro-hack" name = "proc-macro-hack"
version = "0.5.15" version = "0.5.15"
@ -1577,6 +1648,36 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc" checksum = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "structopt"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
dependencies = [
"clap",
"lazy_static 1.4.0",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d239ca4b13aee7a2142e6795cbd69e457665ff8037aed33b3effdc430d2f927a"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "surf" name = "surf"
version = "1.0.3" version = "1.0.3"
@ -1610,6 +1711,17 @@ dependencies = [
"unicode-xid", "unicode-xid",
] ]
[[package]]
name = "syn-mid"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "tempdir" name = "tempdir"
version = "0.3.7" version = "0.3.7"
@ -1631,6 +1743,15 @@ dependencies = [
"utf-8", "utf-8",
] ]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]] [[package]]
name = "thin-slice" name = "thin-slice"
version = "0.1.1" version = "0.1.1"
@ -1694,6 +1815,12 @@ dependencies = [
"smallvec", "smallvec",
] ]
[[package]]
name = "unicode-segmentation"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.7" version = "0.1.7"
@ -1747,6 +1874,12 @@ version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fc439f2794e98976c88a2a2dafce96b930fe8010b0a256b3c2199a773933168" checksum = "3fc439f2794e98976c88a2a2dafce96b930fe8010b0a256b3c2199a773933168"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.1" version = "0.9.1"

View file

@ -13,4 +13,5 @@ epub-builder = "0.4.5"
kuchiki = "0.8.0" kuchiki = "0.8.0"
md5 = "0.7.0" md5 = "0.7.0"
surf = "1.0.3" surf = "1.0.3"
structopt = { version = "0.3" }
url = "2.1.1" url = "2.1.1"

13
src/cli.rs Normal file
View file

@ -0,0 +1,13 @@
use structopt::StructOpt;
#[derive(Debug, StructOpt)]
#[structopt(name = "paperoni")]
/// Paperoni is an article downloader.
///
/// It takes a url and downloads the article content from it and
/// saves it to an epub.
pub struct Opts {
// #[structopt(conflicts_with("links"))]
/// Url of a web article
pub url: Option<String>,
}

View file

@ -2,22 +2,35 @@ use std::fs::File;
use async_std::{fs::create_dir, fs::remove_dir_all, task}; use async_std::{fs::create_dir, fs::remove_dir_all, task};
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
use structopt::StructOpt;
use url::Url; use url::Url;
mod cli;
mod extractor; mod extractor;
use extractor::Extractor; use extractor::Extractor;
fn main() { fn main() {
let opt = cli::Opts::from_args();
if let Some(url) = opt.url {
println!("Downloading single article");
download(url)
}
}
async fn fetch_url(url: &str) -> String {
let client = surf::Client::new();
println!("Fetching...");
// TODO: Add middleware for following redirects
client
.get(url)
.recv_string()
.await
.expect("Unable to fetch URL")
}
fn download(url: String) {
task::block_on(async { task::block_on(async {
let urls = vec![ let html = fetch_url(&url).await;
"https://saveandrun.com/posts/2020-01-24-generating-mazes-with-haskell-part-1.html",
"https://saveandrun.com/posts/2020-04-05-querying-pacman-with-datalog.html",
"https://blog.hipstermojo.xyz/posts/redis-orm-preface/",
"https://vuejsdevelopers.com/2020/03/31/vue-js-form-composition-api/?utm_campaign=xl5&utm_medium=article&utm_source=vuejsnews#adding-validators",
"https://medium.com/typeforms-engineering-blog/the-beginners-guide-to-oauth-dancing-4b8f3666de10",
"https://dev.to/steelwolf180/full-stack-development-in-django-3768"
];
let html = fetch_url(urls[4]).await;
let mut extractor = Extractor::from_html(&html); let mut extractor = Extractor::from_html(&html);
println!("Extracting"); println!("Extracting");
extractor.extract_content(); extractor.extract_content();
@ -25,7 +38,7 @@ fn main() {
.await .await
.expect("Unable to create res/ output folder"); .expect("Unable to create res/ output folder");
extractor extractor
.download_images(&Url::parse(urls[5]).unwrap()) .download_images(&Url::parse(&url).unwrap())
.await .await
.expect("Unable to download images"); .expect("Unable to download images");
let mut out_file = File::create("out.epub").unwrap(); let mut out_file = File::create("out.epub").unwrap();
@ -51,14 +64,3 @@ fn main() {
remove_dir_all("res/").await.unwrap(); remove_dir_all("res/").await.unwrap();
}) })
} }
async fn fetch_url(url: &str) -> String {
let client = surf::Client::new();
println!("Fetching...");
// TODO: Add middleware for following redirects
client
.get(url)
.recv_string()
.await
.expect("Unable to fetch URL")
}