2020-10-12 19:33:01 +01:00
|
|
|
#[macro_use]
|
|
|
|
extern crate lazy_static;
|
|
|
|
|
2020-11-23 06:39:56 +00:00
|
|
|
use async_std::task;
|
2020-05-02 16:33:45 +01:00
|
|
|
use url::Url;
|
2020-04-30 09:05:53 +01:00
|
|
|
|
2020-05-16 08:09:44 +01:00
|
|
|
mod cli;
|
2021-02-06 09:59:03 +00:00
|
|
|
mod epub;
|
2020-05-01 14:17:59 +01:00
|
|
|
mod extractor;
|
2021-02-06 09:59:03 +00:00
|
|
|
/// This module is responsible for async HTTP calls for downloading
|
|
|
|
/// the HTML content and images
|
|
|
|
mod http;
|
2020-08-31 17:30:09 +01:00
|
|
|
mod moz_readability;
|
2020-05-01 14:17:59 +01:00
|
|
|
|
2021-02-06 09:59:03 +00:00
|
|
|
use epub::generate_epub;
|
|
|
|
use http::{download_images, fetch_url};
|
|
|
|
|
2020-05-01 14:17:59 +01:00
|
|
|
use extractor::Extractor;
|
2020-04-30 09:05:53 +01:00
|
|
|
fn main() {
|
2021-02-06 09:59:03 +00:00
|
|
|
let app_config = cli::cli_init();
|
2021-02-01 08:28:07 +00:00
|
|
|
|
2021-02-06 09:59:03 +00:00
|
|
|
if !app_config.urls().is_empty() {
|
|
|
|
download(app_config.urls().clone());
|
2020-05-16 08:09:44 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-22 13:22:56 +01:00
|
|
|
fn download(urls: Vec<String>) {
|
|
|
|
let mut async_url_tasks = Vec::with_capacity(urls.len());
|
|
|
|
for url in urls {
|
2021-01-24 14:49:42 +00:00
|
|
|
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
|
2020-10-22 13:22:56 +01:00
|
|
|
}
|
2021-02-06 09:59:03 +00:00
|
|
|
|
2020-04-30 09:05:53 +01:00
|
|
|
task::block_on(async {
|
2020-10-22 13:22:56 +01:00
|
|
|
for url_task in async_url_tasks {
|
2021-01-24 14:49:42 +00:00
|
|
|
match url_task.await {
|
|
|
|
Ok((url, html)) => {
|
|
|
|
println!("Extracting");
|
|
|
|
let mut extractor = Extractor::from_html(&html);
|
|
|
|
extractor.extract_content(&url);
|
2021-02-06 09:59:03 +00:00
|
|
|
|
2021-01-24 14:49:42 +00:00
|
|
|
if extractor.article().is_some() {
|
2021-02-06 09:59:03 +00:00
|
|
|
download_images(&mut extractor, &Url::parse(&url).unwrap())
|
2021-01-24 14:49:42 +00:00
|
|
|
.await
|
|
|
|
.expect("Unable to download images");
|
2021-02-06 09:59:03 +00:00
|
|
|
generate_epub(extractor);
|
2021-01-24 14:49:42 +00:00
|
|
|
}
|
2020-10-22 13:22:56 +01:00
|
|
|
}
|
2021-01-24 14:49:42 +00:00
|
|
|
Err(e) => println!("{}", e),
|
2020-10-22 10:12:30 +01:00
|
|
|
}
|
2020-05-05 10:24:11 +01:00
|
|
|
}
|
2020-05-02 17:25:31 +01:00
|
|
|
})
|
2020-04-30 09:05:53 +01:00
|
|
|
}
|