diff --git a/.gitignore b/.gitignore index 3ae8faf..8e42494 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target -*.epub \ No newline at end of file +*.epub +*.log \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index be621b1..c047a45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ dependencies = [ "fastrand", "futures-lite", "libc", - "log 0.4.11", + "log 0.4.14", "nb-connect", "once_cell", "parking", @@ -195,7 +195,7 @@ dependencies = [ "futures-lite", "gloo-timers", "kv-log-macro", - "log 0.4.11", + "log 0.4.14", "memchr", "num_cpus", "once_cell", @@ -553,6 +553,16 @@ dependencies = [ "syn", ] +[[package]] +name = "ctor" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "ctr" version = "0.6.0" @@ -719,6 +729,22 @@ dependencies = [ "miniz_oxide 0.3.7", ] +[[package]] +name = "flexi_logger" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1" +dependencies = [ + "atty", + "chrono", + "glob", + "lazy_static", + "log 0.4.14", + "regex", + "thiserror", + "yansi", +] + [[package]] name = "flume" version = "0.9.2" @@ -914,6 +940,12 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "gloo-timers" version = "0.2.1" @@ -980,7 +1012,7 @@ version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" dependencies = [ - "log 0.4.11", + "log 0.4.14", "mac", "markup5ever", "proc-macro2", @@ -1011,7 +1043,7 @@ dependencies = [ "dashmap", "http-types", "isahc", - "log 0.4.11", + "log 0.4.14", ] [[package]] @@ -1087,7 +1119,7 @@ dependencies = [ "flume", "futures-lite", "http", - "log 0.4.11", + "log 0.4.14", "once_cell", "slab", "sluice", @@ -1130,7 +1162,7 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" dependencies = [ - "log 0.4.11", + "log 0.4.14", ] [[package]] @@ -1182,16 +1214,17 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" dependencies = [ - "log 0.4.11", + "log 0.4.14", ] [[package]] name = "log" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", + "value-bag", ] [[package]] @@ -1206,7 +1239,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" dependencies = [ - "log 0.4.11", + "log 0.4.14", "phf", "phf_codegen", "serde", @@ -1277,7 +1310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" dependencies = [ "libc", - "log 0.4.11", + "log 0.4.14", "miow", "ntapi", "winapi", @@ -1410,14 +1443,17 @@ name = "paperoni" version = "0.3.0-alpha1" dependencies = [ "async-std", + "atty", "clap", "comfy-table", "epub-builder", + "flexi_logger", "futures", "html5ever", "indicatif", "kuchiki", "lazy_static", + "log 0.4.14", "md5", "regex", "surf", @@ -1568,7 +1604,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4" dependencies = [ "cfg-if 0.1.10", "libc", - "log 0.4.11", + "log 0.4.14", "wepoll-sys", "winapi", ] @@ -1801,7 +1837,7 @@ dependencies = [ "cssparser", "derive_more", "fxhash", - "log 0.4.11", + "log 0.4.14", "matches", "phf", "phf_codegen", @@ -2112,7 +2148,7 @@ dependencies = [ "futures-util", "http-client", "http-types", - "log 0.4.11", + "log 0.4.14", "mime_guess", "once_cell", "pin-project-lite 0.2.4", @@ -2269,7 +2305,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27" dependencies = [ "cfg-if 0.1.10", - "log 0.4.11", + "log 0.4.14", "pin-project-lite 0.1.11", "tracing-attributes", "tracing-core", @@ -2400,6 +2436,15 @@ dependencies = [ "rand 0.7.3", ] +[[package]] +name = "value-bag" +version = "1.0.0-alpha.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1" +dependencies = [ + "ctor", +] + [[package]] name = "vcpkg" version = "0.2.10" @@ -2460,7 +2505,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68" dependencies = [ "bumpalo", "lazy_static", - "log 0.4.11", + "log 0.4.14", "proc-macro2", "quote", "syn", @@ -2549,6 +2594,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "yansi" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71" + [[package]] name = "zip" version = "0.5.8" diff --git a/Cargo.toml b/Cargo.toml index b8fce77..7cdfcd4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,14 +13,17 @@ readme = "README.md" [dependencies] async-std = "1.9.0" +atty = "0.2.14" clap = "2.33.3" comfy-table = "2.1.0" epub-builder = "0.4.8" +flexi_logger = "0.17.1" futures = "0.3.14" html5ever = "0.25.1" indicatif = "0.15.0" kuchiki = "0.8.1" lazy_static = "1.4.0" +log = "0.4.14" md5 = "0.7.0" regex = "1.4.5" surf = "2.2.0" diff --git a/src/epub.rs b/src/epub.rs index 36d766f..0b46277 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -3,6 +3,7 @@ use std::fs::File; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table}; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; use indicatif::{ProgressBar, ProgressStyle}; +use log::{debug, info}; use crate::{ errors::PaperoniError, @@ -19,7 +20,9 @@ pub fn generate_epubs( "{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}", ); bar.set_style(style); - bar.set_message("Generating epubs"); + if !articles.is_empty() { + bar.set_message("Generating epubs"); + } let mut errors: Vec = Vec::new(); @@ -47,6 +50,7 @@ pub fn generate_epubs( return Err(errors); } }; + debug!("Creating {:?}", name); epub.inline_toc(); articles .iter() @@ -62,7 +66,7 @@ pub fn generate_epubs( EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes()) .title(replace_metadata_value(section_name)), )?; - + info!("Adding images for {:?}", name); article.img_urls.iter().for_each(|img| { // TODO: Add error handling and return errors as a vec let mut file_path = std::env::temp_dir(); @@ -76,6 +80,7 @@ pub fn generate_epubs( ) .unwrap(); }); + info!("Added images for {:?}", name); Ok(()) }; if let Err(mut error) = article_result() { @@ -98,6 +103,7 @@ pub fn generate_epubs( } bar.finish_with_message("Generated epub\n"); + debug!("Created {:?}", name); println!("Created {:?}", name); } None => { @@ -119,6 +125,7 @@ pub fn generate_epubs( .replace("/", " ") .replace("\\", " ") ); + debug!("Creating {:?}", file_name); let mut out_file = File::create(&file_name).unwrap(); let mut html_buf = Vec::new(); extractor::serialize_to_xhtml(article.article(), &mut html_buf) @@ -145,7 +152,7 @@ pub fn generate_epubs( successful_articles_table.add_row(vec![article.metadata().title()]); - // println!("Created {:?}", file_name); + debug!("Created {:?}", file_name); Ok(()) }; if let Err(mut error) = result() { diff --git a/src/http.rs b/src/http.rs index 3dc1e42..9bdaa42 100644 --- a/src/http.rs +++ b/src/http.rs @@ -2,6 +2,7 @@ use async_std::io::prelude::*; use async_std::{fs::File, stream}; use futures::StreamExt; use indicatif::ProgressBar; +use log::{debug, info}; use url::Url; use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor}; @@ -10,7 +11,7 @@ type HTMLResource = (String, String); pub async fn fetch_html(url: &str) -> Result { let client = surf::Client::new(); - // println!("Fetching..."); + debug!("Fetching {}", url); let process_request = async { let mut redirect_count: u8 = 0; @@ -23,10 +24,19 @@ pub async fn fetch_html(url: &str) -> Result { if res.status().is_redirection() { if let Some(location) = res.header(surf::http::headers::LOCATION) { match Url::parse(location.last().as_str()) { - Ok(valid_url) => url = valid_url, + Ok(valid_url) => { + info!("Redirecting {} to {}", url, valid_url); + url = valid_url + } Err(e) => match e { url::ParseError::RelativeUrlWithoutBase => { - url = base_url.join(location.last().as_str())? + match base_url.join(location.last().as_str()) { + Ok(joined_url) => { + info!("Redirecting {} to {}", url, joined_url); + url = joined_url; + } + Err(e) => return Err(e.into()), + } } e => return Err(e.into()), }, @@ -35,6 +45,7 @@ pub async fn fetch_html(url: &str) -> Result { } else if res.status().is_success() { if let Some(mime) = res.content_type() { if mime.essence() == "text/html" { + debug!("Successfully fetched {}", url); return Ok((url.to_string(), res.body_string().await?)); } else { let msg = format!( @@ -67,7 +78,11 @@ pub async fn download_images( bar: &ProgressBar, ) -> Result<(), Vec> { if extractor.img_urls.len() > 0 { - // println!("Downloading images..."); + debug!( + "Downloading {} images for {}", + extractor.img_urls.len(), + article_origin + ); } let img_count = extractor.img_urls.len(); diff --git a/src/main.rs b/src/main.rs index 98fa3a6..3eee69e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY}; use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table}; use futures::stream::StreamExt; use indicatif::{ProgressBar, ProgressStyle}; +use log::{debug, warn}; use url::Url; mod cli; @@ -27,6 +28,13 @@ fn main() { let app_config = cli::cli_init(); if !app_config.urls().is_empty() { + match flexi_logger::Logger::with_str("paperoni=debug") + .log_to_file() + .start() + { + Ok(_) => (), + Err(e) => eprintln!("Unable to start logger!\n{}", e), + } download(app_config); } } @@ -46,7 +54,7 @@ fn download(app_config: AppConfig) { while let Some(fetch_result) = responses.next().await { match fetch_result { Ok((url, html)) => { - // println!("Extracting"); + debug!("Extracting {}", &url); let mut extractor = Extractor::from_html(&html, &url); bar.set_message("Extracting..."); match extractor.extract_content() { @@ -56,7 +64,7 @@ fn download(app_config: AppConfig) { download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar) .await { - eprintln!( + warn!( "{} image{} failed to download for {}", img_errors.len(), if img_errors.len() > 1 { "s" } else { "" }, @@ -78,6 +86,7 @@ fn download(app_config: AppConfig) { articles }); bar.finish_with_message("Downloaded articles"); + let mut succesful_articles_table = Table::new(); succesful_articles_table .load_preset(UTF8_FULL) diff --git a/src/moz_readability/mod.rs b/src/moz_readability/mod.rs index c3ab1d2..38236d3 100644 --- a/src/moz_readability/mod.rs +++ b/src/moz_readability/mod.rs @@ -7,6 +7,7 @@ use kuchiki::{ traits::*, NodeData, NodeRef, }; +use log::info; use url::Url; use crate::errors::{ErrorKind, PaperoniError}; @@ -1587,14 +1588,12 @@ impl Readability { /// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff /// a user wants to read. Then return it wrapped up in a div. fn grab_article(&mut self) -> Result<(), PaperoniError> { - // TODO: Add logging for this - // println!("Grabbing article"); + info!("Grabbing article {:?}", self.metadata.title); // var doc = this._doc; // var isPaging = (page !== null ? true: false); // page = page ? page : this._doc.body; let page = self.root_node.select_first("body"); if page.is_err() { - // TODO:Have error logging for this return Err(ErrorKind::ReadabilityError("Document has no ".into()).into()); } let page = page.unwrap(); @@ -2114,6 +2113,7 @@ impl Readability { false }); self.article_node = Some(article_content); + info!("Successfully grabbed article {:?}", self.metadata.title); return Ok(()); } }