Add logging configured to send to a file by default

This commit is contained in:
Kenneth Gitere 2021-04-24 13:54:47 +03:00
parent c0323a6ae4
commit 910c45abf7
7 changed files with 116 additions and 30 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
/target
*.epub
*.log

85
Cargo.lock generated
View file

@ -150,7 +150,7 @@ dependencies = [
"fastrand",
"futures-lite",
"libc",
"log 0.4.11",
"log 0.4.14",
"nb-connect",
"once_cell",
"parking",
@ -195,7 +195,7 @@ dependencies = [
"futures-lite",
"gloo-timers",
"kv-log-macro",
"log 0.4.11",
"log 0.4.14",
"memchr",
"num_cpus",
"once_cell",
@ -553,6 +553,16 @@ dependencies = [
"syn",
]
[[package]]
name = "ctor"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484"
dependencies = [
"quote",
"syn",
]
[[package]]
name = "ctr"
version = "0.6.0"
@ -719,6 +729,22 @@ dependencies = [
"miniz_oxide 0.3.7",
]
[[package]]
name = "flexi_logger"
version = "0.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
dependencies = [
"atty",
"chrono",
"glob",
"lazy_static",
"log 0.4.14",
"regex",
"thiserror",
"yansi",
]
[[package]]
name = "flume"
version = "0.9.2"
@ -914,6 +940,12 @@ version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "gloo-timers"
version = "0.2.1"
@ -980,7 +1012,7 @@ version = "0.25.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
dependencies = [
"log 0.4.11",
"log 0.4.14",
"mac",
"markup5ever",
"proc-macro2",
@ -1011,7 +1043,7 @@ dependencies = [
"dashmap",
"http-types",
"isahc",
"log 0.4.11",
"log 0.4.14",
]
[[package]]
@ -1087,7 +1119,7 @@ dependencies = [
"flume",
"futures-lite",
"http",
"log 0.4.11",
"log 0.4.14",
"once_cell",
"slab",
"sluice",
@ -1130,7 +1162,7 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
dependencies = [
"log 0.4.11",
"log 0.4.14",
]
[[package]]
@ -1182,16 +1214,17 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
dependencies = [
"log 0.4.11",
"log 0.4.14",
]
[[package]]
name = "log"
version = "0.4.11"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if 0.1.10",
"cfg-if 1.0.0",
"value-bag",
]
[[package]]
@ -1206,7 +1239,7 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
dependencies = [
"log 0.4.11",
"log 0.4.14",
"phf",
"phf_codegen",
"serde",
@ -1277,7 +1310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
dependencies = [
"libc",
"log 0.4.11",
"log 0.4.14",
"miow",
"ntapi",
"winapi",
@ -1410,14 +1443,17 @@ name = "paperoni"
version = "0.3.0-alpha1"
dependencies = [
"async-std",
"atty",
"clap",
"comfy-table",
"epub-builder",
"flexi_logger",
"futures",
"html5ever",
"indicatif",
"kuchiki",
"lazy_static",
"log 0.4.14",
"md5",
"regex",
"surf",
@ -1568,7 +1604,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4"
dependencies = [
"cfg-if 0.1.10",
"libc",
"log 0.4.11",
"log 0.4.14",
"wepoll-sys",
"winapi",
]
@ -1801,7 +1837,7 @@ dependencies = [
"cssparser",
"derive_more",
"fxhash",
"log 0.4.11",
"log 0.4.14",
"matches",
"phf",
"phf_codegen",
@ -2112,7 +2148,7 @@ dependencies = [
"futures-util",
"http-client",
"http-types",
"log 0.4.11",
"log 0.4.14",
"mime_guess",
"once_cell",
"pin-project-lite 0.2.4",
@ -2269,7 +2305,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
dependencies = [
"cfg-if 0.1.10",
"log 0.4.11",
"log 0.4.14",
"pin-project-lite 0.1.11",
"tracing-attributes",
"tracing-core",
@ -2400,6 +2436,15 @@ dependencies = [
"rand 0.7.3",
]
[[package]]
name = "value-bag"
version = "1.0.0-alpha.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1"
dependencies = [
"ctor",
]
[[package]]
name = "vcpkg"
version = "0.2.10"
@ -2460,7 +2505,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
dependencies = [
"bumpalo",
"lazy_static",
"log 0.4.11",
"log 0.4.14",
"proc-macro2",
"quote",
"syn",
@ -2549,6 +2594,12 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "yansi"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"
[[package]]
name = "zip"
version = "0.5.8"

View file

@ -13,14 +13,17 @@ readme = "README.md"
[dependencies]
async-std = "1.9.0"
atty = "0.2.14"
clap = "2.33.3"
comfy-table = "2.1.0"
epub-builder = "0.4.8"
flexi_logger = "0.17.1"
futures = "0.3.14"
html5ever = "0.25.1"
indicatif = "0.15.0"
kuchiki = "0.8.1"
lazy_static = "1.4.0"
log = "0.4.14"
md5 = "0.7.0"
regex = "1.4.5"
surf = "2.2.0"

View file

@ -3,6 +3,7 @@ use std::fs::File;
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, info};
use crate::{
errors::PaperoniError,
@ -19,7 +20,9 @@ pub fn generate_epubs(
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
);
bar.set_style(style);
bar.set_message("Generating epubs");
if !articles.is_empty() {
bar.set_message("Generating epubs");
}
let mut errors: Vec<PaperoniError> = Vec::new();
@ -47,6 +50,7 @@ pub fn generate_epubs(
return Err(errors);
}
};
debug!("Creating {:?}", name);
epub.inline_toc();
articles
.iter()
@ -62,7 +66,7 @@ pub fn generate_epubs(
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
.title(replace_metadata_value(section_name)),
)?;
info!("Adding images for {:?}", name);
article.img_urls.iter().for_each(|img| {
// TODO: Add error handling and return errors as a vec
let mut file_path = std::env::temp_dir();
@ -76,6 +80,7 @@ pub fn generate_epubs(
)
.unwrap();
});
info!("Added images for {:?}", name);
Ok(())
};
if let Err(mut error) = article_result() {
@ -98,6 +103,7 @@ pub fn generate_epubs(
}
bar.finish_with_message("Generated epub\n");
debug!("Created {:?}", name);
println!("Created {:?}", name);
}
None => {
@ -119,6 +125,7 @@ pub fn generate_epubs(
.replace("/", " ")
.replace("\\", " ")
);
debug!("Creating {:?}", file_name);
let mut out_file = File::create(&file_name).unwrap();
let mut html_buf = Vec::new();
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
@ -145,7 +152,7 @@ pub fn generate_epubs(
successful_articles_table.add_row(vec![article.metadata().title()]);
// println!("Created {:?}", file_name);
debug!("Created {:?}", file_name);
Ok(())
};
if let Err(mut error) = result() {

View file

@ -2,6 +2,7 @@ use async_std::io::prelude::*;
use async_std::{fs::File, stream};
use futures::StreamExt;
use indicatif::ProgressBar;
use log::{debug, info};
use url::Url;
use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor};
@ -10,7 +11,7 @@ type HTMLResource = (String, String);
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
let client = surf::Client::new();
// println!("Fetching...");
debug!("Fetching {}", url);
let process_request = async {
let mut redirect_count: u8 = 0;
@ -23,10 +24,19 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
if res.status().is_redirection() {
if let Some(location) = res.header(surf::http::headers::LOCATION) {
match Url::parse(location.last().as_str()) {
Ok(valid_url) => url = valid_url,
Ok(valid_url) => {
info!("Redirecting {} to {}", url, valid_url);
url = valid_url
}
Err(e) => match e {
url::ParseError::RelativeUrlWithoutBase => {
url = base_url.join(location.last().as_str())?
match base_url.join(location.last().as_str()) {
Ok(joined_url) => {
info!("Redirecting {} to {}", url, joined_url);
url = joined_url;
}
Err(e) => return Err(e.into()),
}
}
e => return Err(e.into()),
},
@ -35,6 +45,7 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
} else if res.status().is_success() {
if let Some(mime) = res.content_type() {
if mime.essence() == "text/html" {
debug!("Successfully fetched {}", url);
return Ok((url.to_string(), res.body_string().await?));
} else {
let msg = format!(
@ -67,7 +78,11 @@ pub async fn download_images(
bar: &ProgressBar,
) -> Result<(), Vec<PaperoniError>> {
if extractor.img_urls.len() > 0 {
// println!("Downloading images...");
debug!(
"Downloading {} images for {}",
extractor.img_urls.len(),
article_origin
);
}
let img_count = extractor.img_urls.len();

View file

@ -7,6 +7,7 @@ use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table};
use futures::stream::StreamExt;
use indicatif::{ProgressBar, ProgressStyle};
use log::{debug, warn};
use url::Url;
mod cli;
@ -27,6 +28,13 @@ fn main() {
let app_config = cli::cli_init();
if !app_config.urls().is_empty() {
match flexi_logger::Logger::with_str("paperoni=debug")
.log_to_file()
.start()
{
Ok(_) => (),
Err(e) => eprintln!("Unable to start logger!\n{}", e),
}
download(app_config);
}
}
@ -46,7 +54,7 @@ fn download(app_config: AppConfig) {
while let Some(fetch_result) = responses.next().await {
match fetch_result {
Ok((url, html)) => {
// println!("Extracting");
debug!("Extracting {}", &url);
let mut extractor = Extractor::from_html(&html, &url);
bar.set_message("Extracting...");
match extractor.extract_content() {
@ -56,7 +64,7 @@ fn download(app_config: AppConfig) {
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
.await
{
eprintln!(
warn!(
"{} image{} failed to download for {}",
img_errors.len(),
if img_errors.len() > 1 { "s" } else { "" },
@ -78,6 +86,7 @@ fn download(app_config: AppConfig) {
articles
});
bar.finish_with_message("Downloaded articles");
let mut succesful_articles_table = Table::new();
succesful_articles_table
.load_preset(UTF8_FULL)

View file

@ -7,6 +7,7 @@ use kuchiki::{
traits::*,
NodeData, NodeRef,
};
use log::info;
use url::Url;
use crate::errors::{ErrorKind, PaperoniError};
@ -1587,14 +1588,12 @@ impl Readability {
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
/// a user wants to read. Then return it wrapped up in a div.
fn grab_article(&mut self) -> Result<(), PaperoniError> {
// TODO: Add logging for this
// println!("Grabbing article");
info!("Grabbing article {:?}", self.metadata.title);
// var doc = this._doc;
// var isPaging = (page !== null ? true: false);
// page = page ? page : this._doc.body;
let page = self.root_node.select_first("body");
if page.is_err() {
// TODO:Have error logging for this
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
}
let page = page.unwrap();
@ -2114,6 +2113,7 @@ impl Readability {
false
});
self.article_node = Some(article_content);
info!("Successfully grabbed article {:?}", self.metadata.title);
return Ok(());
}
}