Change CLI option to allow for multiple arguments

Add basic looping in async runtime
This commit is contained in:
Kenneth Gitere 2020-10-22 15:22:56 +03:00
parent db11e78d8c
commit 1b4c4ee658
2 changed files with 57 additions and 46 deletions

View file

@ -9,5 +9,5 @@ use structopt::StructOpt;
pub struct Opts { pub struct Opts {
// #[structopt(conflicts_with("links"))] // #[structopt(conflicts_with("links"))]
/// Url of a web article /// Url of a web article
pub url: Option<String>, pub urls: Vec<String>,
} }

View file

@ -15,63 +15,74 @@ mod moz_readability;
use extractor::Extractor; use extractor::Extractor;
fn main() { fn main() {
let opt = cli::Opts::from_args(); let opt = cli::Opts::from_args();
if let Some(url) = opt.url { if !opt.urls.is_empty() {
println!("Downloading single article"); println!("Downloading single article");
download(url) download(opt.urls);
} }
} }
async fn fetch_url(url: &str) -> String { type HTMLResource = (String, String);
async fn fetch_url(url: &str) -> HTMLResource {
let client = surf::Client::new(); let client = surf::Client::new();
println!("Fetching..."); println!("Fetching...");
// TODO: Add middleware for following redirects // TODO: Add middleware for following redirects
client (
.get(url) url.to_string(),
.recv_string() client
.await .get(url)
.expect("Unable to fetch URL") .recv_string()
.await
.expect("Unable to fetch URL"),
)
} }
fn download(url: String) { fn download(urls: Vec<String>) {
let mut async_url_tasks = Vec::with_capacity(urls.len());
for url in urls {
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
}
task::block_on(async { task::block_on(async {
let html = fetch_url(&url).await; for url_task in async_url_tasks {
println!("Extracting"); let (url, html) = url_task.await;
let mut extractor = Extractor::from_html(&html); println!("Extracting");
extractor.extract_content(&url); let mut extractor = Extractor::from_html(&html);
if extractor.article().is_some() { extractor.extract_content(&url);
create_dir("res/") if extractor.article().is_some() {
.await create_dir("res/")
.expect("Unable to create res/ output folder"); .await
extractor .expect("Unable to create res/ output folder");
.download_images(&Url::parse(&url).unwrap()) extractor
.await .download_images(&Url::parse(&url).unwrap())
.expect("Unable to download images"); .await
let mut out_file = .expect("Unable to download images");
File::create(format!("{}.epub", extractor.metadata().title())).unwrap(); let mut out_file =
let mut html_buf = Vec::new(); File::create(format!("{}.epub", extractor.metadata().title())).unwrap();
extractor let mut html_buf = Vec::new();
.article() extractor
.unwrap() .article()
.serialize(&mut html_buf) .unwrap()
.expect("Unable to serialize"); .serialize(&mut html_buf)
let html_buf = std::str::from_utf8(&html_buf).unwrap(); .expect("Unable to serialize");
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); let html_buf = std::str::from_utf8(&html_buf).unwrap();
if let Some(author) = extractor.metadata().byline() { let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
epub.metadata("author", author).unwrap(); if let Some(author) = extractor.metadata().byline() {
} epub.metadata("author", author).unwrap();
epub.metadata("title", extractor.metadata().title()) }
.unwrap(); epub.metadata("title", extractor.metadata().title())
epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes())) .unwrap();
.unwrap(); epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
for img in extractor.img_urls { .unwrap();
let file_path = format!("{}", &img.0); for img in extractor.img_urls {
let file_path = format!("{}", &img.0);
let img_buf = File::open(file_path).expect("Can't read file"); let img_buf = File::open(file_path).expect("Can't read file");
epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap(); epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap();
}
epub.generate(&mut out_file).unwrap();
println!("Cleaning up");
remove_dir_all("res/").await.unwrap();
} }
epub.generate(&mut out_file).unwrap();
println!("Cleaning up");
remove_dir_all("res/").await.unwrap();
} }
}) })
} }