#[macro_use] extern crate lazy_static; use std::fs::File; use async_std::task; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; use url::Url; mod cli; mod extractor; mod moz_readability; use extractor::Extractor; fn main() { let app = cli::cli_init(); let arg_matches = app.get_matches(); if let Some(vals) = arg_matches.values_of("urls") { let urls = vals.map(|val| val.to_string()).collect::>(); download(urls); } } type HTMLResource = (String, String); async fn fetch_url(url: &str) -> HTMLResource { let client = surf::Client::new(); println!("Fetching..."); // TODO: Add middleware for following redirects ( url.to_string(), client .get(url) .recv_string() .await .expect("Unable to fetch URL"), ) } fn download(urls: Vec) { let mut async_url_tasks = Vec::with_capacity(urls.len()); for url in urls { async_url_tasks.push(task::spawn(async move { fetch_url(&url).await })); } task::block_on(async { for url_task in async_url_tasks { let (url, html) = url_task.await; println!("Extracting"); let mut extractor = Extractor::from_html(&html); extractor.extract_content(&url); if extractor.article().is_some() { extractor .download_images(&Url::parse(&url).unwrap()) .await .expect("Unable to download images"); let file_name = format!("{}.epub", extractor.metadata().title()); let mut out_file = File::create(&file_name).unwrap(); let mut html_buf = Vec::new(); extractor .article() .unwrap() .serialize(&mut html_buf) .expect("Unable to serialize"); let html_buf = std::str::from_utf8(&html_buf).unwrap(); let html_buf = moz_readability::regexes::REPLACE_SELF_CLOSING_REGEX .replace_all(html_buf, "$tag/>"); let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); if let Some(author) = extractor.metadata().byline() { epub.metadata("author", author.replace("&", "&")) .unwrap(); } epub.metadata("title", extractor.metadata().title().replace("&", "&")) .unwrap(); epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes())) .unwrap(); for img in extractor.img_urls { let mut file_path = std::env::temp_dir(); file_path.push(&img.0); let img_buf = File::open(&file_path).expect("Can't read file"); epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap()) .unwrap(); } epub.generate(&mut out_file).unwrap(); println!("Created {:?}", file_name); } } }) }