Add logging configured to send to a file by default

This commit is contained in:
Kenneth Gitere 2021-04-24 13:54:47 +03:00
parent c0323a6ae4
commit 910c45abf7
7 changed files with 116 additions and 30 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
/target /target
*.epub *.epub
*.log

85
Cargo.lock generated
View file

@ -150,7 +150,7 @@ dependencies = [
"fastrand", "fastrand",
"futures-lite", "futures-lite",
"libc", "libc",
"log 0.4.11", "log 0.4.14",
"nb-connect", "nb-connect",
"once_cell", "once_cell",
"parking", "parking",
@ -195,7 +195,7 @@ dependencies = [
"futures-lite", "futures-lite",
"gloo-timers", "gloo-timers",
"kv-log-macro", "kv-log-macro",
"log 0.4.11", "log 0.4.14",
"memchr", "memchr",
"num_cpus", "num_cpus",
"once_cell", "once_cell",
@ -553,6 +553,16 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "ctor"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484"
dependencies = [
"quote",
"syn",
]
[[package]] [[package]]
name = "ctr" name = "ctr"
version = "0.6.0" version = "0.6.0"
@ -719,6 +729,22 @@ dependencies = [
"miniz_oxide 0.3.7", "miniz_oxide 0.3.7",
] ]
[[package]]
name = "flexi_logger"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
dependencies = [
"atty",
"chrono",
"glob",
"lazy_static",
"log 0.4.14",
"regex",
"thiserror",
"yansi",
]
[[package]] [[package]]
name = "flume" name = "flume"
version = "0.9.2" version = "0.9.2"
@ -914,6 +940,12 @@ version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce" checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]] [[package]]
name = "gloo-timers" name = "gloo-timers"
version = "0.2.1" version = "0.2.1"
@ -980,7 +1012,7 @@ version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
dependencies = [ dependencies = [
"log 0.4.11", "log 0.4.14",
"mac", "mac",
"markup5ever", "markup5ever",
"proc-macro2", "proc-macro2",
@ -1011,7 +1043,7 @@ dependencies = [
"dashmap", "dashmap",
"http-types", "http-types",
"isahc", "isahc",
"log 0.4.11", "log 0.4.14",
] ]
[[package]] [[package]]
@ -1087,7 +1119,7 @@ dependencies = [
"flume", "flume",
"futures-lite", "futures-lite",
"http", "http",
"log 0.4.11", "log 0.4.14",
"once_cell", "once_cell",
"slab", "slab",
"sluice", "sluice",
@ -1130,7 +1162,7 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
dependencies = [ dependencies = [
"log 0.4.11", "log 0.4.14",
] ]
[[package]] [[package]]
@ -1182,16 +1214,17 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
dependencies = [ dependencies = [
"log 0.4.11", "log 0.4.14",
] ]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.11" version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [ dependencies = [
"cfg-if 0.1.10", "cfg-if 1.0.0",
"value-bag",
] ]
[[package]] [[package]]
@ -1206,7 +1239,7 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab" checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
dependencies = [ dependencies = [
"log 0.4.11", "log 0.4.14",
"phf", "phf",
"phf_codegen", "phf_codegen",
"serde", "serde",
@ -1277,7 +1310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956" checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
dependencies = [ dependencies = [
"libc", "libc",
"log 0.4.11", "log 0.4.14",
"miow", "miow",
"ntapi", "ntapi",
"winapi", "winapi",
@ -1410,14 +1443,17 @@ name = "paperoni"
version = "0.3.0-alpha1" version = "0.3.0-alpha1"
dependencies = [ dependencies = [
"async-std", "async-std",
"atty",
"clap", "clap",
"comfy-table", "comfy-table",
"epub-builder", "epub-builder",
"flexi_logger",
"futures", "futures",
"html5ever", "html5ever",
"indicatif", "indicatif",
"kuchiki", "kuchiki",
"lazy_static", "lazy_static",
"log 0.4.14",
"md5", "md5",
"regex", "regex",
"surf", "surf",
@ -1568,7 +1604,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4"
dependencies = [ dependencies = [
"cfg-if 0.1.10", "cfg-if 0.1.10",
"libc", "libc",
"log 0.4.11", "log 0.4.14",
"wepoll-sys", "wepoll-sys",
"winapi", "winapi",
] ]
@ -1801,7 +1837,7 @@ dependencies = [
"cssparser", "cssparser",
"derive_more", "derive_more",
"fxhash", "fxhash",
"log 0.4.11", "log 0.4.14",
"matches", "matches",
"phf", "phf",
"phf_codegen", "phf_codegen",
@ -2112,7 +2148,7 @@ dependencies = [
"futures-util", "futures-util",
"http-client", "http-client",
"http-types", "http-types",
"log 0.4.11", "log 0.4.14",
"mime_guess", "mime_guess",
"once_cell", "once_cell",
"pin-project-lite 0.2.4", "pin-project-lite 0.2.4",
@ -2269,7 +2305,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27" checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
dependencies = [ dependencies = [
"cfg-if 0.1.10", "cfg-if 0.1.10",
"log 0.4.11", "log 0.4.14",
"pin-project-lite 0.1.11", "pin-project-lite 0.1.11",
"tracing-attributes", "tracing-attributes",
"tracing-core", "tracing-core",
@ -2400,6 +2436,15 @@ dependencies = [
"rand 0.7.3", "rand 0.7.3",
] ]
[[package]]
name = "value-bag"
version = "1.0.0-alpha.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1"
dependencies = [
"ctor",
]
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.10" version = "0.2.10"
@ -2460,7 +2505,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"lazy_static", "lazy_static",
"log 0.4.11", "log 0.4.14",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn", "syn",
@ -2549,6 +2594,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"
[[package]] [[package]]
name = "zip" name = "zip"
version = "0.5.8" version = "0.5.8"

View file

@ -13,14 +13,17 @@ readme = "README.md"
[dependencies] [dependencies]
async-std = "1.9.0" async-std = "1.9.0"
atty = "0.2.14"
clap = "2.33.3" clap = "2.33.3"
comfy-table = "2.1.0" comfy-table = "2.1.0"
epub-builder = "0.4.8" epub-builder = "0.4.8"
flexi_logger = "0.17.1"
futures = "0.3.14" futures = "0.3.14"
html5ever = "0.25.1" html5ever = "0.25.1"
indicatif = "0.15.0" indicatif = "0.15.0"
kuchiki = "0.8.1" kuchiki = "0.8.1"
lazy_static = "1.4.0" lazy_static = "1.4.0"
log = "0.4.14"
md5 = "0.7.0" md5 = "0.7.0"
regex = "1.4.5" regex = "1.4.5"
surf = "2.2.0" surf = "2.2.0"

View file

@ -3,6 +3,7 @@ use std::fs::File;
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table}; use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, info};
use crate::{ use crate::{
errors::PaperoniError, errors::PaperoniError,
@ -19,7 +20,9 @@ pub fn generate_epubs(
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}", "{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
); );
bar.set_style(style); bar.set_style(style);
bar.set_message("Generating epubs"); if !articles.is_empty() {
bar.set_message("Generating epubs");
}
let mut errors: Vec<PaperoniError> = Vec::new(); let mut errors: Vec<PaperoniError> = Vec::new();
@ -47,6 +50,7 @@ pub fn generate_epubs(
return Err(errors); return Err(errors);
} }
}; };
debug!("Creating {:?}", name);
epub.inline_toc(); epub.inline_toc();
articles articles
.iter() .iter()
@ -62,7 +66,7 @@ pub fn generate_epubs(
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes()) EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
.title(replace_metadata_value(section_name)), .title(replace_metadata_value(section_name)),
)?; )?;
info!("Adding images for {:?}", name);
article.img_urls.iter().for_each(|img| { article.img_urls.iter().for_each(|img| {
// TODO: Add error handling and return errors as a vec // TODO: Add error handling and return errors as a vec
let mut file_path = std::env::temp_dir(); let mut file_path = std::env::temp_dir();
@ -76,6 +80,7 @@ pub fn generate_epubs(
) )
.unwrap(); .unwrap();
}); });
info!("Added images for {:?}", name);
Ok(()) Ok(())
}; };
if let Err(mut error) = article_result() { if let Err(mut error) = article_result() {
@ -98,6 +103,7 @@ pub fn generate_epubs(
} }
bar.finish_with_message("Generated epub\n"); bar.finish_with_message("Generated epub\n");
debug!("Created {:?}", name);
println!("Created {:?}", name); println!("Created {:?}", name);
} }
None => { None => {
@ -119,6 +125,7 @@ pub fn generate_epubs(
.replace("/", " ") .replace("/", " ")
.replace("\\", " ") .replace("\\", " ")
); );
debug!("Creating {:?}", file_name);
let mut out_file = File::create(&file_name).unwrap(); let mut out_file = File::create(&file_name).unwrap();
let mut html_buf = Vec::new(); let mut html_buf = Vec::new();
extractor::serialize_to_xhtml(article.article(), &mut html_buf) extractor::serialize_to_xhtml(article.article(), &mut html_buf)
@ -145,7 +152,7 @@ pub fn generate_epubs(
successful_articles_table.add_row(vec![article.metadata().title()]); successful_articles_table.add_row(vec![article.metadata().title()]);
// println!("Created {:?}", file_name); debug!("Created {:?}", file_name);
Ok(()) Ok(())
}; };
if let Err(mut error) = result() { if let Err(mut error) = result() {

View file

@ -2,6 +2,7 @@ use async_std::io::prelude::*;
use async_std::{fs::File, stream}; use async_std::{fs::File, stream};
use futures::StreamExt; use futures::StreamExt;
use indicatif::ProgressBar; use indicatif::ProgressBar;
use log::{debug, info};
use url::Url; use url::Url;
use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor}; use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor};
@ -10,7 +11,7 @@ type HTMLResource = (String, String);
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> { pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
let client = surf::Client::new(); let client = surf::Client::new();
// println!("Fetching..."); debug!("Fetching {}", url);
let process_request = async { let process_request = async {
let mut redirect_count: u8 = 0; let mut redirect_count: u8 = 0;
@ -23,10 +24,19 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
if res.status().is_redirection() { if res.status().is_redirection() {
if let Some(location) = res.header(surf::http::headers::LOCATION) { if let Some(location) = res.header(surf::http::headers::LOCATION) {
match Url::parse(location.last().as_str()) { match Url::parse(location.last().as_str()) {
Ok(valid_url) => url = valid_url, Ok(valid_url) => {
info!("Redirecting {} to {}", url, valid_url);
url = valid_url
}
Err(e) => match e { Err(e) => match e {
url::ParseError::RelativeUrlWithoutBase => { url::ParseError::RelativeUrlWithoutBase => {
url = base_url.join(location.last().as_str())? match base_url.join(location.last().as_str()) {
Ok(joined_url) => {
info!("Redirecting {} to {}", url, joined_url);
url = joined_url;
}
Err(e) => return Err(e.into()),
}
} }
e => return Err(e.into()), e => return Err(e.into()),
}, },
@ -35,6 +45,7 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
} else if res.status().is_success() { } else if res.status().is_success() {
if let Some(mime) = res.content_type() { if let Some(mime) = res.content_type() {
if mime.essence() == "text/html" { if mime.essence() == "text/html" {
debug!("Successfully fetched {}", url);
return Ok((url.to_string(), res.body_string().await?)); return Ok((url.to_string(), res.body_string().await?));
} else { } else {
let msg = format!( let msg = format!(
@ -67,7 +78,11 @@ pub async fn download_images(
bar: &ProgressBar, bar: &ProgressBar,
) -> Result<(), Vec<PaperoniError>> { ) -> Result<(), Vec<PaperoniError>> {
if extractor.img_urls.len() > 0 { if extractor.img_urls.len() > 0 {
// println!("Downloading images..."); debug!(
"Downloading {} images for {}",
extractor.img_urls.len(),
article_origin
);
} }
let img_count = extractor.img_urls.len(); let img_count = extractor.img_urls.len();

View file

@ -7,6 +7,7 @@ use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table}; use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table};
use futures::stream::StreamExt; use futures::stream::StreamExt;
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, warn};
use url::Url; use url::Url;
mod cli; mod cli;
@ -27,6 +28,13 @@ fn main() {
let app_config = cli::cli_init(); let app_config = cli::cli_init();
if !app_config.urls().is_empty() { if !app_config.urls().is_empty() {
match flexi_logger::Logger::with_str("paperoni=debug")
.log_to_file()
.start()
{
Ok(_) => (),
Err(e) => eprintln!("Unable to start logger!\n{}", e),
}
download(app_config); download(app_config);
} }
} }
@ -46,7 +54,7 @@ fn download(app_config: AppConfig) {
while let Some(fetch_result) = responses.next().await { while let Some(fetch_result) = responses.next().await {
match fetch_result { match fetch_result {
Ok((url, html)) => { Ok((url, html)) => {
// println!("Extracting"); debug!("Extracting {}", &url);
let mut extractor = Extractor::from_html(&html, &url); let mut extractor = Extractor::from_html(&html, &url);
bar.set_message("Extracting..."); bar.set_message("Extracting...");
match extractor.extract_content() { match extractor.extract_content() {
@ -56,7 +64,7 @@ fn download(app_config: AppConfig) {
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar) download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
.await .await
{ {
eprintln!( warn!(
"{} image{} failed to download for {}", "{} image{} failed to download for {}",
img_errors.len(), img_errors.len(),
if img_errors.len() > 1 { "s" } else { "" }, if img_errors.len() > 1 { "s" } else { "" },
@ -78,6 +86,7 @@ fn download(app_config: AppConfig) {
articles articles
}); });
bar.finish_with_message("Downloaded articles"); bar.finish_with_message("Downloaded articles");
let mut succesful_articles_table = Table::new(); let mut succesful_articles_table = Table::new();
succesful_articles_table succesful_articles_table
.load_preset(UTF8_FULL) .load_preset(UTF8_FULL)

View file

@ -7,6 +7,7 @@ use kuchiki::{
traits::*, traits::*,
NodeData, NodeRef, NodeData, NodeRef,
}; };
use log::info;
use url::Url; use url::Url;
use crate::errors::{ErrorKind, PaperoniError}; use crate::errors::{ErrorKind, PaperoniError};
@ -1587,14 +1588,12 @@ impl Readability {
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff /// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
/// a user wants to read. Then return it wrapped up in a div. /// a user wants to read. Then return it wrapped up in a div.
fn grab_article(&mut self) -> Result<(), PaperoniError> { fn grab_article(&mut self) -> Result<(), PaperoniError> {
// TODO: Add logging for this info!("Grabbing article {:?}", self.metadata.title);
// println!("Grabbing article");
// var doc = this._doc; // var doc = this._doc;
// var isPaging = (page !== null ? true: false); // var isPaging = (page !== null ? true: false);
// page = page ? page : this._doc.body; // page = page ? page : this._doc.body;
let page = self.root_node.select_first("body"); let page = self.root_node.select_first("body");
if page.is_err() { if page.is_err() {
// TODO:Have error logging for this
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into()); return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
} }
let page = page.unwrap(); let page = page.unwrap();
@ -2114,6 +2113,7 @@ impl Readability {
false false
}); });
self.article_node = Some(article_content); self.article_node = Some(article_content);
info!("Successfully grabbed article {:?}", self.metadata.title);
return Ok(()); return Ok(());
} }
} }