Add logging configured to send to a file by default
This commit is contained in:
parent
c0323a6ae4
commit
910c45abf7
7 changed files with 116 additions and 30 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
||||||
/target
|
/target
|
||||||
*.epub
|
*.epub
|
||||||
|
*.log
|
85
Cargo.lock
generated
85
Cargo.lock
generated
|
@ -150,7 +150,7 @@ dependencies = [
|
||||||
"fastrand",
|
"fastrand",
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"libc",
|
"libc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"nb-connect",
|
"nb-connect",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking",
|
"parking",
|
||||||
|
@ -195,7 +195,7 @@ dependencies = [
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"gloo-timers",
|
"gloo-timers",
|
||||||
"kv-log-macro",
|
"kv-log-macro",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"memchr",
|
"memchr",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -553,6 +553,16 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctor"
|
||||||
|
version = "0.1.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ctr"
|
name = "ctr"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
|
@ -719,6 +729,22 @@ dependencies = [
|
||||||
"miniz_oxide 0.3.7",
|
"miniz_oxide 0.3.7",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flexi_logger"
|
||||||
|
version = "0.17.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"chrono",
|
||||||
|
"glob",
|
||||||
|
"lazy_static",
|
||||||
|
"log 0.4.14",
|
||||||
|
"regex",
|
||||||
|
"thiserror",
|
||||||
|
"yansi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flume"
|
name = "flume"
|
||||||
version = "0.9.2"
|
version = "0.9.2"
|
||||||
|
@ -914,6 +940,12 @@ version = "0.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
|
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gloo-timers"
|
name = "gloo-timers"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
@ -980,7 +1012,7 @@ version = "0.25.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"mac",
|
"mac",
|
||||||
"markup5ever",
|
"markup5ever",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -1011,7 +1043,7 @@ dependencies = [
|
||||||
"dashmap",
|
"dashmap",
|
||||||
"http-types",
|
"http-types",
|
||||||
"isahc",
|
"isahc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1087,7 +1119,7 @@ dependencies = [
|
||||||
"flume",
|
"flume",
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"http",
|
"http",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"slab",
|
"slab",
|
||||||
"sluice",
|
"sluice",
|
||||||
|
@ -1130,7 +1162,7 @@ version = "1.0.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1182,16 +1214,17 @@ version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.11"
|
version = "0.4.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 1.0.0",
|
||||||
|
"value-bag",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1206,7 +1239,7 @@ version = "0.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"phf",
|
"phf",
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -1277,7 +1310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"miow",
|
"miow",
|
||||||
"ntapi",
|
"ntapi",
|
||||||
"winapi",
|
"winapi",
|
||||||
|
@ -1410,14 +1443,17 @@ name = "paperoni"
|
||||||
version = "0.3.0-alpha1"
|
version = "0.3.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
|
"atty",
|
||||||
"clap",
|
"clap",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
"epub-builder",
|
"epub-builder",
|
||||||
|
"flexi_logger",
|
||||||
"futures",
|
"futures",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
"kuchiki",
|
"kuchiki",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"log 0.4.14",
|
||||||
"md5",
|
"md5",
|
||||||
"regex",
|
"regex",
|
||||||
"surf",
|
"surf",
|
||||||
|
@ -1568,7 +1604,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 0.1.10",
|
||||||
"libc",
|
"libc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"wepoll-sys",
|
"wepoll-sys",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -1801,7 +1837,7 @@ dependencies = [
|
||||||
"cssparser",
|
"cssparser",
|
||||||
"derive_more",
|
"derive_more",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"matches",
|
"matches",
|
||||||
"phf",
|
"phf",
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
|
@ -2112,7 +2148,7 @@ dependencies = [
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"http-client",
|
"http-client",
|
||||||
"http-types",
|
"http-types",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"mime_guess",
|
"mime_guess",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pin-project-lite 0.2.4",
|
"pin-project-lite 0.2.4",
|
||||||
|
@ -2269,7 +2305,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
|
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 0.1.10",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"pin-project-lite 0.1.11",
|
"pin-project-lite 0.1.11",
|
||||||
"tracing-attributes",
|
"tracing-attributes",
|
||||||
"tracing-core",
|
"tracing-core",
|
||||||
|
@ -2400,6 +2436,15 @@ dependencies = [
|
||||||
"rand 0.7.3",
|
"rand 0.7.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "value-bag"
|
||||||
|
version = "1.0.0-alpha.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1"
|
||||||
|
dependencies = [
|
||||||
|
"ctor",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vcpkg"
|
name = "vcpkg"
|
||||||
version = "0.2.10"
|
version = "0.2.10"
|
||||||
|
@ -2460,7 +2505,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn",
|
||||||
|
@ -2549,6 +2594,12 @@ version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yansi"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zip"
|
name = "zip"
|
||||||
version = "0.5.8"
|
version = "0.5.8"
|
||||||
|
|
|
@ -13,14 +13,17 @@ readme = "README.md"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1.9.0"
|
async-std = "1.9.0"
|
||||||
|
atty = "0.2.14"
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
comfy-table = "2.1.0"
|
comfy-table = "2.1.0"
|
||||||
epub-builder = "0.4.8"
|
epub-builder = "0.4.8"
|
||||||
|
flexi_logger = "0.17.1"
|
||||||
futures = "0.3.14"
|
futures = "0.3.14"
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
indicatif = "0.15.0"
|
indicatif = "0.15.0"
|
||||||
kuchiki = "0.8.1"
|
kuchiki = "0.8.1"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
|
log = "0.4.14"
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
regex = "1.4.5"
|
regex = "1.4.5"
|
||||||
surf = "2.2.0"
|
surf = "2.2.0"
|
||||||
|
|
13
src/epub.rs
13
src/epub.rs
|
@ -3,6 +3,7 @@ use std::fs::File;
|
||||||
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||||
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
use log::{debug, info};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
errors::PaperoniError,
|
errors::PaperoniError,
|
||||||
|
@ -19,7 +20,9 @@ pub fn generate_epubs(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
||||||
);
|
);
|
||||||
bar.set_style(style);
|
bar.set_style(style);
|
||||||
bar.set_message("Generating epubs");
|
if !articles.is_empty() {
|
||||||
|
bar.set_message("Generating epubs");
|
||||||
|
}
|
||||||
|
|
||||||
let mut errors: Vec<PaperoniError> = Vec::new();
|
let mut errors: Vec<PaperoniError> = Vec::new();
|
||||||
|
|
||||||
|
@ -47,6 +50,7 @@ pub fn generate_epubs(
|
||||||
return Err(errors);
|
return Err(errors);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
debug!("Creating {:?}", name);
|
||||||
epub.inline_toc();
|
epub.inline_toc();
|
||||||
articles
|
articles
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -62,7 +66,7 @@ pub fn generate_epubs(
|
||||||
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
||||||
.title(replace_metadata_value(section_name)),
|
.title(replace_metadata_value(section_name)),
|
||||||
)?;
|
)?;
|
||||||
|
info!("Adding images for {:?}", name);
|
||||||
article.img_urls.iter().for_each(|img| {
|
article.img_urls.iter().for_each(|img| {
|
||||||
// TODO: Add error handling and return errors as a vec
|
// TODO: Add error handling and return errors as a vec
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
|
@ -76,6 +80,7 @@ pub fn generate_epubs(
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
});
|
});
|
||||||
|
info!("Added images for {:?}", name);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
if let Err(mut error) = article_result() {
|
if let Err(mut error) = article_result() {
|
||||||
|
@ -98,6 +103,7 @@ pub fn generate_epubs(
|
||||||
}
|
}
|
||||||
|
|
||||||
bar.finish_with_message("Generated epub\n");
|
bar.finish_with_message("Generated epub\n");
|
||||||
|
debug!("Created {:?}", name);
|
||||||
println!("Created {:?}", name);
|
println!("Created {:?}", name);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -119,6 +125,7 @@ pub fn generate_epubs(
|
||||||
.replace("/", " ")
|
.replace("/", " ")
|
||||||
.replace("\\", " ")
|
.replace("\\", " ")
|
||||||
);
|
);
|
||||||
|
debug!("Creating {:?}", file_name);
|
||||||
let mut out_file = File::create(&file_name).unwrap();
|
let mut out_file = File::create(&file_name).unwrap();
|
||||||
let mut html_buf = Vec::new();
|
let mut html_buf = Vec::new();
|
||||||
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
||||||
|
@ -145,7 +152,7 @@ pub fn generate_epubs(
|
||||||
|
|
||||||
successful_articles_table.add_row(vec![article.metadata().title()]);
|
successful_articles_table.add_row(vec![article.metadata().title()]);
|
||||||
|
|
||||||
// println!("Created {:?}", file_name);
|
debug!("Created {:?}", file_name);
|
||||||
Ok(())
|
Ok(())
|
||||||
};
|
};
|
||||||
if let Err(mut error) = result() {
|
if let Err(mut error) = result() {
|
||||||
|
|
23
src/http.rs
23
src/http.rs
|
@ -2,6 +2,7 @@ use async_std::io::prelude::*;
|
||||||
use async_std::{fs::File, stream};
|
use async_std::{fs::File, stream};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
|
use log::{debug, info};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor};
|
use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor};
|
||||||
|
@ -10,7 +11,7 @@ type HTMLResource = (String, String);
|
||||||
|
|
||||||
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
let client = surf::Client::new();
|
let client = surf::Client::new();
|
||||||
// println!("Fetching...");
|
debug!("Fetching {}", url);
|
||||||
|
|
||||||
let process_request = async {
|
let process_request = async {
|
||||||
let mut redirect_count: u8 = 0;
|
let mut redirect_count: u8 = 0;
|
||||||
|
@ -23,10 +24,19 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
if res.status().is_redirection() {
|
if res.status().is_redirection() {
|
||||||
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
||||||
match Url::parse(location.last().as_str()) {
|
match Url::parse(location.last().as_str()) {
|
||||||
Ok(valid_url) => url = valid_url,
|
Ok(valid_url) => {
|
||||||
|
info!("Redirecting {} to {}", url, valid_url);
|
||||||
|
url = valid_url
|
||||||
|
}
|
||||||
Err(e) => match e {
|
Err(e) => match e {
|
||||||
url::ParseError::RelativeUrlWithoutBase => {
|
url::ParseError::RelativeUrlWithoutBase => {
|
||||||
url = base_url.join(location.last().as_str())?
|
match base_url.join(location.last().as_str()) {
|
||||||
|
Ok(joined_url) => {
|
||||||
|
info!("Redirecting {} to {}", url, joined_url);
|
||||||
|
url = joined_url;
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
e => return Err(e.into()),
|
e => return Err(e.into()),
|
||||||
},
|
},
|
||||||
|
@ -35,6 +45,7 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
} else if res.status().is_success() {
|
} else if res.status().is_success() {
|
||||||
if let Some(mime) = res.content_type() {
|
if let Some(mime) = res.content_type() {
|
||||||
if mime.essence() == "text/html" {
|
if mime.essence() == "text/html" {
|
||||||
|
debug!("Successfully fetched {}", url);
|
||||||
return Ok((url.to_string(), res.body_string().await?));
|
return Ok((url.to_string(), res.body_string().await?));
|
||||||
} else {
|
} else {
|
||||||
let msg = format!(
|
let msg = format!(
|
||||||
|
@ -67,7 +78,11 @@ pub async fn download_images(
|
||||||
bar: &ProgressBar,
|
bar: &ProgressBar,
|
||||||
) -> Result<(), Vec<PaperoniError>> {
|
) -> Result<(), Vec<PaperoniError>> {
|
||||||
if extractor.img_urls.len() > 0 {
|
if extractor.img_urls.len() > 0 {
|
||||||
// println!("Downloading images...");
|
debug!(
|
||||||
|
"Downloading {} images for {}",
|
||||||
|
extractor.img_urls.len(),
|
||||||
|
article_origin
|
||||||
|
);
|
||||||
}
|
}
|
||||||
let img_count = extractor.img_urls.len();
|
let img_count = extractor.img_urls.len();
|
||||||
|
|
||||||
|
|
13
src/main.rs
13
src/main.rs
|
@ -7,6 +7,7 @@ use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||||
use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table};
|
use comfy_table::{Attribute, Cell, CellAlignment, ContentArrangement, Table};
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
use log::{debug, warn};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
mod cli;
|
mod cli;
|
||||||
|
@ -27,6 +28,13 @@ fn main() {
|
||||||
let app_config = cli::cli_init();
|
let app_config = cli::cli_init();
|
||||||
|
|
||||||
if !app_config.urls().is_empty() {
|
if !app_config.urls().is_empty() {
|
||||||
|
match flexi_logger::Logger::with_str("paperoni=debug")
|
||||||
|
.log_to_file()
|
||||||
|
.start()
|
||||||
|
{
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => eprintln!("Unable to start logger!\n{}", e),
|
||||||
|
}
|
||||||
download(app_config);
|
download(app_config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -46,7 +54,7 @@ fn download(app_config: AppConfig) {
|
||||||
while let Some(fetch_result) = responses.next().await {
|
while let Some(fetch_result) = responses.next().await {
|
||||||
match fetch_result {
|
match fetch_result {
|
||||||
Ok((url, html)) => {
|
Ok((url, html)) => {
|
||||||
// println!("Extracting");
|
debug!("Extracting {}", &url);
|
||||||
let mut extractor = Extractor::from_html(&html, &url);
|
let mut extractor = Extractor::from_html(&html, &url);
|
||||||
bar.set_message("Extracting...");
|
bar.set_message("Extracting...");
|
||||||
match extractor.extract_content() {
|
match extractor.extract_content() {
|
||||||
|
@ -56,7 +64,7 @@ fn download(app_config: AppConfig) {
|
||||||
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
eprintln!(
|
warn!(
|
||||||
"{} image{} failed to download for {}",
|
"{} image{} failed to download for {}",
|
||||||
img_errors.len(),
|
img_errors.len(),
|
||||||
if img_errors.len() > 1 { "s" } else { "" },
|
if img_errors.len() > 1 { "s" } else { "" },
|
||||||
|
@ -78,6 +86,7 @@ fn download(app_config: AppConfig) {
|
||||||
articles
|
articles
|
||||||
});
|
});
|
||||||
bar.finish_with_message("Downloaded articles");
|
bar.finish_with_message("Downloaded articles");
|
||||||
|
|
||||||
let mut succesful_articles_table = Table::new();
|
let mut succesful_articles_table = Table::new();
|
||||||
succesful_articles_table
|
succesful_articles_table
|
||||||
.load_preset(UTF8_FULL)
|
.load_preset(UTF8_FULL)
|
||||||
|
|
|
@ -7,6 +7,7 @@ use kuchiki::{
|
||||||
traits::*,
|
traits::*,
|
||||||
NodeData, NodeRef,
|
NodeData, NodeRef,
|
||||||
};
|
};
|
||||||
|
use log::info;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::errors::{ErrorKind, PaperoniError};
|
use crate::errors::{ErrorKind, PaperoniError};
|
||||||
|
@ -1587,14 +1588,12 @@ impl Readability {
|
||||||
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
||||||
/// a user wants to read. Then return it wrapped up in a div.
|
/// a user wants to read. Then return it wrapped up in a div.
|
||||||
fn grab_article(&mut self) -> Result<(), PaperoniError> {
|
fn grab_article(&mut self) -> Result<(), PaperoniError> {
|
||||||
// TODO: Add logging for this
|
info!("Grabbing article {:?}", self.metadata.title);
|
||||||
// println!("Grabbing article");
|
|
||||||
// var doc = this._doc;
|
// var doc = this._doc;
|
||||||
// var isPaging = (page !== null ? true: false);
|
// var isPaging = (page !== null ? true: false);
|
||||||
// page = page ? page : this._doc.body;
|
// page = page ? page : this._doc.body;
|
||||||
let page = self.root_node.select_first("body");
|
let page = self.root_node.select_first("body");
|
||||||
if page.is_err() {
|
if page.is_err() {
|
||||||
// TODO:Have error logging for this
|
|
||||||
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
|
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
|
||||||
}
|
}
|
||||||
let page = page.unwrap();
|
let page = page.unwrap();
|
||||||
|
@ -2114,6 +2113,7 @@ impl Readability {
|
||||||
false
|
false
|
||||||
});
|
});
|
||||||
self.article_node = Some(article_content);
|
self.article_node = Some(article_content);
|
||||||
|
info!("Successfully grabbed article {:?}", self.metadata.title);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue