Add printing of tables upon successful extraction
This commit is contained in:
parent
04a1eed4e2
commit
b217448601
4 changed files with 193 additions and 9 deletions
165
Cargo.lock
generated
165
Cargo.lock
generated
|
@ -394,6 +394,17 @@ dependencies = [
|
||||||
"vec_map",
|
"vec_map",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "comfy-table"
|
||||||
|
version = "2.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "17b99e9022e080d384b58d8eaf5976b42a311ff7a9669f8200eb2453c0b2b81a"
|
||||||
|
dependencies = [
|
||||||
|
"crossterm",
|
||||||
|
"strum",
|
||||||
|
"strum_macros",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "concurrent-queue"
|
name = "concurrent-queue"
|
||||||
version = "1.2.2"
|
version = "1.2.2"
|
||||||
|
@ -468,6 +479,31 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm"
|
||||||
|
version = "0.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"crossterm_winapi",
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"parking_lot",
|
||||||
|
"signal-hook",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm_winapi"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crypto-mac"
|
name = "crypto-mac"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
@ -872,6 +908,15 @@ dependencies = [
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.1.17"
|
version = "0.1.17"
|
||||||
|
@ -1075,9 +1120,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.80"
|
version = "0.2.93"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libnghttp2-sys"
|
name = "libnghttp2-sys"
|
||||||
|
@ -1204,6 +1249,28 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mio"
|
||||||
|
version = "0.7.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"log 0.4.11",
|
||||||
|
"miow",
|
||||||
|
"ntapi",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miow"
|
||||||
|
version = "0.3.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mustache"
|
name = "mustache"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
|
@ -1236,6 +1303,15 @@ version = "0.1.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ntapi"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-integer"
|
name = "num-integer"
|
||||||
version = "0.1.44"
|
version = "0.1.44"
|
||||||
|
@ -1314,6 +1390,7 @@ version = "0.3.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"clap",
|
"clap",
|
||||||
|
"comfy-table",
|
||||||
"epub-builder",
|
"epub-builder",
|
||||||
"futures",
|
"futures",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
|
@ -1333,6 +1410,31 @@ version = "2.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
|
||||||
|
dependencies = [
|
||||||
|
"instant",
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"instant",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall 0.2.6",
|
||||||
|
"smallvec",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.1.0"
|
version = "2.1.0"
|
||||||
|
@ -1596,6 +1698,15 @@ version = "0.1.57"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redox_syscall"
|
||||||
|
version = "0.2.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.4.2"
|
version = "1.4.2"
|
||||||
|
@ -1779,6 +1890,26 @@ dependencies = [
|
||||||
"opaque-debug",
|
"opaque-debug",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook"
|
||||||
|
version = "0.1.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"signal-hook-registry",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-registry"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "siphasher"
|
name = "siphasher"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
|
@ -1804,9 +1935,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "smallvec"
|
||||||
version = "1.5.0"
|
version = "1.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85"
|
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socket2"
|
name = "socket2"
|
||||||
|
@ -1816,7 +1947,7 @@ checksum = "2c29947abdee2a218277abeca306f25789c938e500ea5a9d4b12a5a504466902"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"libc",
|
"libc",
|
||||||
"redox_syscall",
|
"redox_syscall 0.1.57",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1924,6 +2055,24 @@ version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum_macros"
|
||||||
|
version = "0.20.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "subtle"
|
name = "subtle"
|
||||||
version = "2.3.0"
|
version = "2.3.0"
|
||||||
|
@ -2178,6 +2327,12 @@ dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-segmentation"
|
||||||
|
version = "1.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-width"
|
name = "unicode-width"
|
||||||
version = "0.1.8"
|
version = "0.1.8"
|
||||||
|
|
|
@ -14,6 +14,7 @@ readme = "README.md"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1.7.0"
|
async-std = "1.7.0"
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
|
comfy-table = "2.1.0"
|
||||||
epub-builder = "0.4.8"
|
epub-builder = "0.4.8"
|
||||||
futures = "0.3.12"
|
futures = "0.3.12"
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
|
|
33
src/epub.rs
33
src/epub.rs
|
@ -1,5 +1,7 @@
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
|
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||||
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||||
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
|
||||||
|
@ -18,8 +20,17 @@ pub fn generate_epubs(
|
||||||
);
|
);
|
||||||
bar.set_style(style);
|
bar.set_style(style);
|
||||||
bar.set_message("Generating epubs");
|
bar.set_message("Generating epubs");
|
||||||
|
let mut base_table = Table::new();
|
||||||
|
base_table
|
||||||
|
.load_preset(UTF8_FULL)
|
||||||
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
match merged {
|
match merged {
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
|
base_table.set_header(vec![Cell::new("Table of Contents")
|
||||||
|
.add_attribute(Attribute::Bold)
|
||||||
|
.set_alignment(CellAlignment::Center)
|
||||||
|
.fg(Color::Green)]);
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
||||||
epub.inline_toc();
|
epub.inline_toc();
|
||||||
epub = articles
|
epub = articles
|
||||||
|
@ -41,7 +52,6 @@ pub fn generate_epubs(
|
||||||
|
|
||||||
article.img_urls.iter().for_each(|img| {
|
article.img_urls.iter().for_each(|img| {
|
||||||
// TODO: Add error handling
|
// TODO: Add error handling
|
||||||
bar.inc(1);
|
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
file_path.push(&img.0);
|
file_path.push(&img.0);
|
||||||
|
|
||||||
|
@ -53,6 +63,8 @@ pub fn generate_epubs(
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
});
|
});
|
||||||
|
bar.inc(1);
|
||||||
|
base_table.add_row(vec![article.metadata().title()]);
|
||||||
epub
|
epub
|
||||||
});
|
});
|
||||||
let mut out_file = File::create(&name).unwrap();
|
let mut out_file = File::create(&name).unwrap();
|
||||||
|
@ -61,6 +73,13 @@ pub fn generate_epubs(
|
||||||
println!("Created {:?}", name);
|
println!("Created {:?}", name);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
base_table
|
||||||
|
.set_header(vec![Cell::new("Downloaded articles")
|
||||||
|
.add_attribute(Attribute::Bold)
|
||||||
|
.set_alignment(CellAlignment::Center)
|
||||||
|
.fg(Color::Green)])
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
|
||||||
for article in articles {
|
for article in articles {
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
||||||
let file_name = format!(
|
let file_name = format!(
|
||||||
|
@ -81,20 +100,28 @@ pub fn generate_epubs(
|
||||||
}
|
}
|
||||||
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
||||||
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
||||||
for img in article.img_urls {
|
for img in &article.img_urls {
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
file_path.push(&img.0);
|
file_path.push(&img.0);
|
||||||
|
|
||||||
let img_buf = File::open(&file_path).expect("Can't read file");
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
||||||
epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())?;
|
epub.add_resource(
|
||||||
|
file_path.file_name().unwrap(),
|
||||||
|
img_buf,
|
||||||
|
img.1.as_ref().unwrap(),
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
epub.generate(&mut out_file)?;
|
epub.generate(&mut out_file)?;
|
||||||
bar.inc(1);
|
bar.inc(1);
|
||||||
|
|
||||||
|
base_table.add_row(vec![article.metadata().title()]);
|
||||||
|
|
||||||
// println!("Created {:?}", file_name);
|
// println!("Created {:?}", file_name);
|
||||||
}
|
}
|
||||||
bar.finish_with_message("Generated epubs\n");
|
bar.finish_with_message("Generated epubs\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
println!("{}", base_table);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1585,7 +1585,8 @@ impl Readability {
|
||||||
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
||||||
/// a user wants to read. Then return it wrapped up in a div.
|
/// a user wants to read. Then return it wrapped up in a div.
|
||||||
fn grab_article(&mut self) {
|
fn grab_article(&mut self) {
|
||||||
println!("Grabbing article");
|
// TODO: Add logging for this
|
||||||
|
// println!("Grabbing article");
|
||||||
// var doc = this._doc;
|
// var doc = this._doc;
|
||||||
// var isPaging = (page !== null ? true: false);
|
// var isPaging = (page !== null ? true: false);
|
||||||
// page = page ? page : this._doc.body;
|
// page = page ? page : this._doc.body;
|
||||||
|
|
Reference in a new issue