Change CLI option to allow for multiple arguments
Add basic looping in async runtime
This commit is contained in:
parent
db11e78d8c
commit
1b4c4ee658
2 changed files with 57 additions and 46 deletions
|
@ -9,5 +9,5 @@ use structopt::StructOpt;
|
||||||
pub struct Opts {
|
pub struct Opts {
|
||||||
// #[structopt(conflicts_with("links"))]
|
// #[structopt(conflicts_with("links"))]
|
||||||
/// Url of a web article
|
/// Url of a web article
|
||||||
pub url: Option<String>,
|
pub urls: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
23
src/main.rs
23
src/main.rs
|
@ -15,26 +15,36 @@ mod moz_readability;
|
||||||
use extractor::Extractor;
|
use extractor::Extractor;
|
||||||
fn main() {
|
fn main() {
|
||||||
let opt = cli::Opts::from_args();
|
let opt = cli::Opts::from_args();
|
||||||
if let Some(url) = opt.url {
|
if !opt.urls.is_empty() {
|
||||||
println!("Downloading single article");
|
println!("Downloading single article");
|
||||||
download(url)
|
download(opt.urls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_url(url: &str) -> String {
|
type HTMLResource = (String, String);
|
||||||
|
|
||||||
|
async fn fetch_url(url: &str) -> HTMLResource {
|
||||||
let client = surf::Client::new();
|
let client = surf::Client::new();
|
||||||
println!("Fetching...");
|
println!("Fetching...");
|
||||||
// TODO: Add middleware for following redirects
|
// TODO: Add middleware for following redirects
|
||||||
|
(
|
||||||
|
url.to_string(),
|
||||||
client
|
client
|
||||||
.get(url)
|
.get(url)
|
||||||
.recv_string()
|
.recv_string()
|
||||||
.await
|
.await
|
||||||
.expect("Unable to fetch URL")
|
.expect("Unable to fetch URL"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn download(url: String) {
|
fn download(urls: Vec<String>) {
|
||||||
|
let mut async_url_tasks = Vec::with_capacity(urls.len());
|
||||||
|
for url in urls {
|
||||||
|
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
|
||||||
|
}
|
||||||
task::block_on(async {
|
task::block_on(async {
|
||||||
let html = fetch_url(&url).await;
|
for url_task in async_url_tasks {
|
||||||
|
let (url, html) = url_task.await;
|
||||||
println!("Extracting");
|
println!("Extracting");
|
||||||
let mut extractor = Extractor::from_html(&html);
|
let mut extractor = Extractor::from_html(&html);
|
||||||
extractor.extract_content(&url);
|
extractor.extract_content(&url);
|
||||||
|
@ -73,5 +83,6 @@ fn download(url: String) {
|
||||||
println!("Cleaning up");
|
println!("Cleaning up");
|
||||||
remove_dir_all("res/").await.unwrap();
|
remove_dir_all("res/").await.unwrap();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue