paperoni/src/main.rs

53 lines
1.4 KiB
Rust
Raw Normal View History

#[macro_use]
extern crate lazy_static;
use async_std::task;
2020-05-02 16:33:45 +01:00
use url::Url;
2020-05-16 08:09:44 +01:00
mod cli;
2021-02-06 09:59:03 +00:00
mod epub;
mod extractor;
2021-02-06 09:59:03 +00:00
/// This module is responsible for async HTTP calls for downloading
/// the HTML content and images
mod http;
mod moz_readability;
2021-02-06 09:59:03 +00:00
use epub::generate_epub;
use http::{download_images, fetch_url};
use extractor::Extractor;
fn main() {
2021-02-06 09:59:03 +00:00
let app_config = cli::cli_init();
2021-02-06 09:59:03 +00:00
if !app_config.urls().is_empty() {
download(app_config.urls().clone());
2020-05-16 08:09:44 +01:00
}
}
fn download(urls: Vec<String>) {
let mut async_url_tasks = Vec::with_capacity(urls.len());
for url in urls {
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
}
2021-02-06 09:59:03 +00:00
task::block_on(async {
for url_task in async_url_tasks {
match url_task.await {
Ok((url, html)) => {
println!("Extracting");
let mut extractor = Extractor::from_html(&html);
extractor.extract_content(&url);
2021-02-06 09:59:03 +00:00
if extractor.article().is_some() {
2021-02-06 09:59:03 +00:00
download_images(&mut extractor, &Url::parse(&url).unwrap())
.await
.expect("Unable to download images");
2021-02-06 09:59:03 +00:00
generate_epub(extractor);
}
}
Err(e) => println!("{}", e),
}
}
2020-05-02 17:25:31 +01:00
})
}