Add printing of tables upon successful extraction

This commit is contained in:
Kenneth Gitere 2021-04-20 14:02:56 +03:00
parent 04a1eed4e2
commit b217448601
4 changed files with 193 additions and 9 deletions

165
Cargo.lock generated
View file

@ -394,6 +394,17 @@ dependencies = [
"vec_map", "vec_map",
] ]
[[package]]
name = "comfy-table"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17b99e9022e080d384b58d8eaf5976b42a311ff7a9669f8200eb2453c0b2b81a"
dependencies = [
"crossterm",
"strum",
"strum_macros",
]
[[package]] [[package]]
name = "concurrent-queue" name = "concurrent-queue"
version = "1.2.2" version = "1.2.2"
@ -468,6 +479,31 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "crossterm"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
dependencies = [
"bitflags",
"crossterm_winapi",
"lazy_static",
"libc",
"mio",
"parking_lot",
"signal-hook",
"winapi",
]
[[package]]
name = "crossterm_winapi"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "crypto-mac" name = "crypto-mac"
version = "0.10.0" version = "0.10.0"
@ -872,6 +908,15 @@ dependencies = [
"web-sys", "web-sys",
] ]
[[package]]
name = "heck"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
dependencies = [
"unicode-segmentation",
]
[[package]] [[package]]
name = "hermit-abi" name = "hermit-abi"
version = "0.1.17" version = "0.1.17"
@ -1075,9 +1120,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.80" version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
[[package]] [[package]]
name = "libnghttp2-sys" name = "libnghttp2-sys"
@ -1204,6 +1249,28 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "mio"
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
dependencies = [
"libc",
"log 0.4.11",
"miow",
"ntapi",
"winapi",
]
[[package]]
name = "miow"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "mustache" name = "mustache"
version = "0.9.0" version = "0.9.0"
@ -1236,6 +1303,15 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "ntapi"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "num-integer" name = "num-integer"
version = "0.1.44" version = "0.1.44"
@ -1314,6 +1390,7 @@ version = "0.3.0-alpha1"
dependencies = [ dependencies = [
"async-std", "async-std",
"clap", "clap",
"comfy-table",
"epub-builder", "epub-builder",
"futures", "futures",
"html5ever", "html5ever",
@ -1333,6 +1410,31 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72" checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
[[package]]
name = "parking_lot"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
dependencies = [
"cfg-if 1.0.0",
"instant",
"libc",
"redox_syscall 0.2.6",
"smallvec",
"winapi",
]
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.1.0" version = "2.1.0"
@ -1596,6 +1698,15 @@ version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
[[package]]
name = "redox_syscall"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
dependencies = [
"bitflags",
]
[[package]] [[package]]
name = "regex" name = "regex"
version = "1.4.2" version = "1.4.2"
@ -1779,6 +1890,26 @@ dependencies = [
"opaque-debug", "opaque-debug",
] ]
[[package]]
name = "signal-hook"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
dependencies = [
"libc",
"mio",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-registry"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "siphasher" name = "siphasher"
version = "0.3.3" version = "0.3.3"
@ -1804,9 +1935,9 @@ dependencies = [
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.5.0" version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
[[package]] [[package]]
name = "socket2" name = "socket2"
@ -1816,7 +1947,7 @@ checksum = "2c29947abdee2a218277abeca306f25789c938e500ea5a9d4b12a5a504466902"
dependencies = [ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"libc", "libc",
"redox_syscall", "redox_syscall 0.1.57",
"winapi", "winapi",
] ]
@ -1924,6 +2055,24 @@ version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "strum"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
[[package]]
name = "strum_macros"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "subtle" name = "subtle"
version = "2.3.0" version = "2.3.0"
@ -2178,6 +2327,12 @@ dependencies = [
"tinyvec", "tinyvec",
] ]
[[package]]
name = "unicode-segmentation"
version = "1.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
[[package]] [[package]]
name = "unicode-width" name = "unicode-width"
version = "0.1.8" version = "0.1.8"

View file

@ -14,6 +14,7 @@ readme = "README.md"
[dependencies] [dependencies]
async-std = "1.7.0" async-std = "1.7.0"
clap = "2.33.3" clap = "2.33.3"
comfy-table = "2.1.0"
epub-builder = "0.4.8" epub-builder = "0.4.8"
futures = "0.3.12" futures = "0.3.12"
html5ever = "0.25.1" html5ever = "0.25.1"

View file

@ -1,5 +1,7 @@
use std::fs::File; use std::fs::File;
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
use indicatif::{ProgressBar, ProgressStyle}; use indicatif::{ProgressBar, ProgressStyle};
@ -18,8 +20,17 @@ pub fn generate_epubs(
); );
bar.set_style(style); bar.set_style(style);
bar.set_message("Generating epubs"); bar.set_message("Generating epubs");
let mut base_table = Table::new();
base_table
.load_preset(UTF8_FULL)
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
.set_content_arrangement(ContentArrangement::Dynamic);
match merged { match merged {
Some(name) => { Some(name) => {
base_table.set_header(vec![Cell::new("Table of Contents")
.add_attribute(Attribute::Bold)
.set_alignment(CellAlignment::Center)
.fg(Color::Green)]);
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?; let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
epub.inline_toc(); epub.inline_toc();
epub = articles epub = articles
@ -41,7 +52,6 @@ pub fn generate_epubs(
article.img_urls.iter().for_each(|img| { article.img_urls.iter().for_each(|img| {
// TODO: Add error handling // TODO: Add error handling
bar.inc(1);
let mut file_path = std::env::temp_dir(); let mut file_path = std::env::temp_dir();
file_path.push(&img.0); file_path.push(&img.0);
@ -53,6 +63,8 @@ pub fn generate_epubs(
) )
.unwrap(); .unwrap();
}); });
bar.inc(1);
base_table.add_row(vec![article.metadata().title()]);
epub epub
}); });
let mut out_file = File::create(&name).unwrap(); let mut out_file = File::create(&name).unwrap();
@ -61,6 +73,13 @@ pub fn generate_epubs(
println!("Created {:?}", name); println!("Created {:?}", name);
} }
None => { None => {
base_table
.set_header(vec![Cell::new("Downloaded articles")
.add_attribute(Attribute::Bold)
.set_alignment(CellAlignment::Center)
.fg(Color::Green)])
.set_content_arrangement(ContentArrangement::Dynamic);
for article in articles { for article in articles {
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?; let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
let file_name = format!( let file_name = format!(
@ -81,20 +100,28 @@ pub fn generate_epubs(
} }
epub.metadata("title", replace_metadata_value(article.metadata().title()))?; epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?; epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
for img in article.img_urls { for img in &article.img_urls {
let mut file_path = std::env::temp_dir(); let mut file_path = std::env::temp_dir();
file_path.push(&img.0); file_path.push(&img.0);
let img_buf = File::open(&file_path).expect("Can't read file"); let img_buf = File::open(&file_path).expect("Can't read file");
epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())?; epub.add_resource(
file_path.file_name().unwrap(),
img_buf,
img.1.as_ref().unwrap(),
)?;
} }
epub.generate(&mut out_file)?; epub.generate(&mut out_file)?;
bar.inc(1); bar.inc(1);
base_table.add_row(vec![article.metadata().title()]);
// println!("Created {:?}", file_name); // println!("Created {:?}", file_name);
} }
bar.finish_with_message("Generated epubs\n"); bar.finish_with_message("Generated epubs\n");
} }
} }
println!("{}", base_table);
Ok(()) Ok(())
} }

View file

@ -1585,7 +1585,8 @@ impl Readability {
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff /// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
/// a user wants to read. Then return it wrapped up in a div. /// a user wants to read. Then return it wrapped up in a div.
fn grab_article(&mut self) { fn grab_article(&mut self) {
println!("Grabbing article"); // TODO: Add logging for this
// println!("Grabbing article");
// var doc = this._doc; // var doc = this._doc;
// var isPaging = (page !== null ? true: false); // var isPaging = (page !== null ? true: false);
// page = page ? page : this._doc.body; // page = page ? page : this._doc.body;