Change CLI option to allow for multiple arguments

Add basic looping in async runtime
This commit is contained in:
Kenneth Gitere 2020-10-22 15:22:56 +03:00
parent db11e78d8c
commit 1b4c4ee658
2 changed files with 57 additions and 46 deletions

View file

@ -9,5 +9,5 @@ use structopt::StructOpt;
pub struct Opts {
// #[structopt(conflicts_with("links"))]
/// Url of a web article
pub url: Option<String>,
pub urls: Vec<String>,
}

View file

@ -15,26 +15,36 @@ mod moz_readability;
use extractor::Extractor;
fn main() {
let opt = cli::Opts::from_args();
if let Some(url) = opt.url {
if !opt.urls.is_empty() {
println!("Downloading single article");
download(url)
download(opt.urls);
}
}
async fn fetch_url(url: &str) -> String {
type HTMLResource = (String, String);
async fn fetch_url(url: &str) -> HTMLResource {
let client = surf::Client::new();
println!("Fetching...");
// TODO: Add middleware for following redirects
(
url.to_string(),
client
.get(url)
.recv_string()
.await
.expect("Unable to fetch URL")
.expect("Unable to fetch URL"),
)
}
fn download(url: String) {
fn download(urls: Vec<String>) {
let mut async_url_tasks = Vec::with_capacity(urls.len());
for url in urls {
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
}
task::block_on(async {
let html = fetch_url(&url).await;
for url_task in async_url_tasks {
let (url, html) = url_task.await;
println!("Extracting");
let mut extractor = Extractor::from_html(&html);
extractor.extract_content(&url);
@ -73,5 +83,6 @@ fn download(url: String) {
println!("Cleaning up");
remove_dir_all("res/").await.unwrap();
}
}
})
}