commit
6b1a826ccc
14 changed files with 1106 additions and 524 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
/target
|
/target
|
||||||
*.epub
|
*.epub
|
||||||
*.log
|
*.log
|
||||||
|
.vscode/
|
227
Cargo.lock
generated
227
Cargo.lock
generated
|
@ -1,5 +1,7 @@
|
||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "addr2line"
|
name = "addr2line"
|
||||||
version = "0.14.1"
|
version = "0.14.1"
|
||||||
|
@ -71,9 +73,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "0.7.15"
|
version = "0.7.18"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
|
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
@ -389,7 +391,7 @@ dependencies = [
|
||||||
"ansi_term",
|
"ansi_term",
|
||||||
"atty",
|
"atty",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"strsim",
|
"strsim 0.8.0",
|
||||||
"textwrap",
|
"textwrap",
|
||||||
"unicode-width",
|
"unicode-width",
|
||||||
"vec_map",
|
"vec_map",
|
||||||
|
@ -408,9 +410,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "comfy-table"
|
name = "comfy-table"
|
||||||
version = "2.1.0"
|
version = "3.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "17b99e9022e080d384b58d8eaf5976b42a311ff7a9669f8200eb2453c0b2b81a"
|
checksum = "c93d79ba722818d1a6aedfbe2cf4889330c856d0c6772951efbbf3dd283c070a"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crossterm",
|
"crossterm",
|
||||||
"strum",
|
"strum",
|
||||||
|
@ -435,9 +437,7 @@ dependencies = [
|
||||||
"encode_unicode",
|
"encode_unicode",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"libc",
|
"libc",
|
||||||
"regex",
|
|
||||||
"terminal_size",
|
"terminal_size",
|
||||||
"unicode-width",
|
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -504,25 +504,25 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossterm"
|
name = "crossterm"
|
||||||
version = "0.19.0"
|
version = "0.20.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
|
checksum = "c0ebde6a9dd5e331cd6c6f48253254d117642c31653baa475e394657c59c1f7d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"crossterm_winapi",
|
"crossterm_winapi",
|
||||||
"lazy_static",
|
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"signal-hook",
|
"signal-hook",
|
||||||
|
"signal-hook-mio",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossterm_winapi"
|
name = "crossterm_winapi"
|
||||||
version = "0.7.0"
|
version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
|
checksum = "3a6966607622438301997d3dac0d2f6e9a90c68bb6bc1785ea98456ab93c0507"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -614,6 +614,41 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling"
|
||||||
|
version = "0.12.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5f2c43f534ea4b0b049015d00269734195e6d3f0f6635cb692251aca6f9f8b3c"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core",
|
||||||
|
"darling_macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_core"
|
||||||
|
version = "0.12.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e91455b86830a1c21799d94524df0845183fa55bafd9aa137b01c7d1065fa36"
|
||||||
|
dependencies = [
|
||||||
|
"fnv",
|
||||||
|
"ident_case",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"strsim 0.10.0",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "darling_macro"
|
||||||
|
version = "0.12.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "29b5acf0dea37a7f66f7b25d2c5e93fd46f8f6968b1a5d7a3e02e97768afc95a"
|
||||||
|
dependencies = [
|
||||||
|
"darling_core",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dashmap"
|
name = "dashmap"
|
||||||
version = "4.0.2"
|
version = "4.0.2"
|
||||||
|
@ -630,6 +665,37 @@ version = "2.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
|
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d13202debe11181040ae9063d739fa32cfcaaebe2275fe387703460ae2365b30"
|
||||||
|
dependencies = [
|
||||||
|
"derive_builder_macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder_core"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "66e616858f6187ed828df7c64a6d71720d83767a7f19740b2d1b6fe6327b36e5"
|
||||||
|
dependencies = [
|
||||||
|
"darling",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_builder_macro"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "58a94ace95092c5acb1e97a7e846b310cfbd499652f72297da7493f618a98d73"
|
||||||
|
dependencies = [
|
||||||
|
"derive_builder_core",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "derive_more"
|
name = "derive_more"
|
||||||
version = "0.99.13"
|
version = "0.99.13"
|
||||||
|
@ -692,6 +758,12 @@ dependencies = [
|
||||||
"dtoa",
|
"dtoa",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encode_unicode"
|
name = "encode_unicode"
|
||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
|
@ -763,9 +835,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flexi_logger"
|
name = "flexi_logger"
|
||||||
version = "0.17.1"
|
version = "0.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
|
checksum = "8ba2265890613939b533fa11c3728651531419ac549ccf527896201581f23991"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atty",
|
"atty",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
@ -822,9 +894,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -837,9 +909,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
checksum = "e682a68b29a882df0545c143dc3646daefe80ba479bcdede94d5a703de2871e2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
|
@ -847,15 +919,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
checksum = "0402f765d8a89a26043b889b26ce3c4679d268fa6bb22cd7c6aad98340e179d1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
checksum = "badaa6a909fac9e7236d0620a2f57f7664640c56575b71a7552fbd68deafab79"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
|
@ -864,9 +936,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-lite"
|
name = "futures-lite"
|
||||||
|
@ -885,10 +957,11 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -897,22 +970,23 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
checksum = "a57bead0ceff0d6dde8f465ecd96c9338121bb7717d3e7b108059531870c4282"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
checksum = "8a16bef9fc1a4dddb5bee51c989e3fbba26569cbb0e31f5b303c184e3dd33dae"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
|
@ -1112,6 +1186,12 @@ dependencies = [
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ident_case"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
|
@ -1125,9 +1205,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indicatif"
|
name = "indicatif"
|
||||||
version = "0.15.0"
|
version = "0.16.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
|
checksum = "2d207dc617c7a380ab07ff572a6e52fa202a2a8f355860ac9c38e23f8196be1b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"console",
|
"console",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
@ -1173,6 +1253,15 @@ dependencies = [
|
||||||
"waker-fn",
|
"waker-fn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.10.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "0.4.7"
|
version = "0.4.7"
|
||||||
|
@ -1305,9 +1394,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.3.4"
|
version = "2.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mime"
|
name = "mime"
|
||||||
|
@ -1419,9 +1508,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "number_prefix"
|
name = "number_prefix"
|
||||||
version = "0.3.0"
|
version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
|
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
|
@ -1462,19 +1551,21 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.4.1-alpha1"
|
version = "0.5.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"chrono",
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
"colored",
|
"colored",
|
||||||
"comfy-table",
|
"comfy-table",
|
||||||
|
"derive_builder",
|
||||||
"directories",
|
"directories",
|
||||||
"epub-builder",
|
"epub-builder",
|
||||||
"flexi_logger",
|
"flexi_logger",
|
||||||
"futures",
|
"futures",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
|
"itertools",
|
||||||
"kuchiki",
|
"kuchiki",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log 0.4.14",
|
"log 0.4.14",
|
||||||
|
@ -1829,9 +1920,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.4.6"
|
version = "1.5.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2a26af418b574bd56588335b3a3659a65725d4e636eb1016c2f9e3b38c7cc759"
|
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
|
@ -1840,9 +1931,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.6.23"
|
version = "0.6.25"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "remove_dir_all"
|
name = "remove_dir_all"
|
||||||
|
@ -2011,20 +2102,30 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook"
|
name = "signal-hook"
|
||||||
version = "0.1.17"
|
version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
|
checksum = "470c5a6397076fae0094aaf06a08e6ba6f37acb77d3b1b91ea92b4d6c8650c39"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
|
||||||
"signal-hook-registry",
|
"signal-hook-registry",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-registry"
|
name = "signal-hook-mio"
|
||||||
version = "1.3.0"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
|
checksum = "29fd5867f1c4f2c5be079aee7a2adf1152ebb04a4bc4d341f504b7dece607ed4"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"signal-hook",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-registry"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
@ -2173,16 +2274,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strum"
|
name = "strsim"
|
||||||
version = "0.20.0"
|
version = "0.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
|
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.21.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strum_macros"
|
name = "strum_macros"
|
||||||
version = "0.20.1"
|
version = "0.21.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
|
checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -2277,18 +2384,18 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.24"
|
version = "1.0.25"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
|
checksum = "fa6f76457f59514c7eeb4e59d891395fab0b2fd1d40723ae737d64153392e9c6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"thiserror-impl",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror-impl"
|
name = "thiserror-impl"
|
||||||
version = "1.0.24"
|
version = "1.0.25"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
|
checksum = "8a36768c0fbf1bb15eca10defa29526bda730a2376c2ab4393ccfa16fb1a318d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
|
@ -2465,9 +2572,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.2.1"
|
version = "2.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
|
checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"form_urlencoded",
|
"form_urlencoded",
|
||||||
"idna",
|
"idna",
|
||||||
|
|
20
Cargo.toml
20
Cargo.toml
|
@ -3,7 +3,7 @@ description = "A web article downloader"
|
||||||
homepage = "https://github.com/hipstermojo/paperoni"
|
homepage = "https://github.com/hipstermojo/paperoni"
|
||||||
repository = "https://github.com/hipstermojo/paperoni"
|
repository = "https://github.com/hipstermojo/paperoni"
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.4.1-alpha1"
|
version = "0.5.0-alpha1"
|
||||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -12,23 +12,25 @@ readme = "README.md"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1.9.0"
|
|
||||||
# atty = "0.2.14"
|
# atty = "0.2.14"
|
||||||
|
async-std = "1.9.0"
|
||||||
chrono = "0.4.19"
|
chrono = "0.4.19"
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
colored = "2.0.0"
|
colored = "2.0.0"
|
||||||
comfy-table = "2.1.0"
|
comfy-table = "3.0.0"
|
||||||
|
derive_builder = "0.10.2"
|
||||||
directories = "3.0.2"
|
directories = "3.0.2"
|
||||||
epub-builder = "0.4.8"
|
epub-builder = "0.4.8"
|
||||||
flexi_logger = "0.17.1"
|
flexi_logger = "0.18.0"
|
||||||
futures = "0.3.14"
|
futures = "0.3.15"
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
indicatif = "0.15.0"
|
indicatif = "0.16.2"
|
||||||
|
itertools = "0.10.1"
|
||||||
kuchiki = "0.8.1"
|
kuchiki = "0.8.1"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
regex = "1.4.5"
|
regex = "1.5.4"
|
||||||
surf = "2.2.0"
|
surf = "2.2.0"
|
||||||
thiserror = "1.0.24"
|
thiserror = "1.0.25"
|
||||||
url = "2.2.1"
|
url = "2.2.2"
|
||||||
|
|
61
README.md
61
README.md
|
@ -8,7 +8,7 @@
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs.
|
Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs. There is provisional<sup><a href="#pdf-exports">\*</a></sup> support for exporting to PDF as well.
|
||||||
|
|
||||||
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ Check the [releases](https://github.com/hipstermojo/paperoni/releases) page for
|
||||||
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cargo install paperoni --version 0.4.1-alpha1
|
cargo install paperoni --version 0.5.0-alpha1
|
||||||
```
|
```
|
||||||
|
|
||||||
_Paperoni is still in alpha so the `version` flag has to be passed._
|
_Paperoni is still in alpha so the `version` flag has to be passed._
|
||||||
|
@ -48,18 +48,44 @@ USAGE:
|
||||||
paperoni [OPTIONS] [urls]...
|
paperoni [OPTIONS] [urls]...
|
||||||
|
|
||||||
OPTIONS:
|
OPTIONS:
|
||||||
-f, --file <file> Input file containing links
|
-f, --file <file>
|
||||||
-h, --help Prints help information
|
Input file containing links
|
||||||
--log-to-file Enables logging of events to a file located in .paperoni/logs with a default log level
|
|
||||||
of debug. Use -v to specify the logging level
|
-h, --help
|
||||||
--max_conn <max_conn> The maximum number of concurrent HTTP connections when downloading articles. Default is
|
Prints help information
|
||||||
8
|
|
||||||
--merge <output_name> Merge multiple articles into a single epub
|
--inline-toc
|
||||||
-V, --version Prints version information
|
Add an inlined Table of Contents page at the start of the merged article.
|
||||||
-v Enables logging of events and set the verbosity level. Use -h to read on its usage
|
|
||||||
|
--log-to-file
|
||||||
|
Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to
|
||||||
|
specify the logging level
|
||||||
|
--max-conn <max_conn>
|
||||||
|
The maximum number of concurrent HTTP connections when downloading articles. Default is 8.
|
||||||
|
NOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end
|
||||||
|
up overloading your network card with too many concurrent requests.
|
||||||
|
-o, --output-dir <output_directory>
|
||||||
|
Directory for saving epub documents
|
||||||
|
|
||||||
|
--merge <output_name>
|
||||||
|
Merge multiple articles into a single epub that will be given the name provided
|
||||||
|
|
||||||
|
-V, --version
|
||||||
|
Prints version information
|
||||||
|
|
||||||
|
-v
|
||||||
|
This takes upto 4 levels of verbosity in the following order.
|
||||||
|
- Error (-v)
|
||||||
|
- Warn (-vv)
|
||||||
|
- Info (-vvv)
|
||||||
|
- Debug (-vvvv)
|
||||||
|
When this flag is passed, it disables the progress bars and logs to stderr.
|
||||||
|
If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag.
|
||||||
|
|
||||||
ARGS:
|
ARGS:
|
||||||
<urls>... Urls of web articles
|
<urls>...
|
||||||
|
Urls of web articles
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
To download a single article pass in its URL
|
To download a single article pass in its URL
|
||||||
|
@ -124,3 +150,14 @@ This program is still in alpha so a number of things won't work:
|
||||||
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
||||||
|
|
||||||
There are also web pages it won't work on in general such as Twitter and Reddit threads.
|
There are also web pages it won't work on in general such as Twitter and Reddit threads.
|
||||||
|
|
||||||
|
## PDF exports
|
||||||
|
|
||||||
|
As of version 0.5-alpha1, you can now export to PDF using a third party tool. This requires that you install [Calibre](https://calibre-ebook.com/) which comes with a ebook conversion. You can convert the epub to a pdf through the terminal with `ebook-convert`:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Assuming the downloaded epub was called foo.epub
|
||||||
|
ebook-convert foo.epub foo.pdf
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can use the Calibre GUI to do the file conversion.
|
||||||
|
|
1
rust-toolchain
Normal file
1
rust-toolchain
Normal file
|
@ -0,0 +1 @@
|
||||||
|
1.52.1
|
7
src/assets/writ.min.css
vendored
Normal file
7
src/assets/writ.min.css
vendored
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
/*!
|
||||||
|
* Writ v1.0.4
|
||||||
|
*
|
||||||
|
* Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
|
||||||
|
*
|
||||||
|
* https://cmcenroe.me/writ/LICENSE (ISC)
|
||||||
|
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
|
292
src/cli.rs
292
src/cli.rs
|
@ -1,13 +1,32 @@
|
||||||
use std::{fs::File, io::Read, path::Path};
|
use std::{fs, num::NonZeroUsize, path::Path};
|
||||||
|
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
use clap::{App, AppSettings, Arg};
|
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||||
use flexi_logger::LevelFilter as LogLevel;
|
use flexi_logger::LevelFilter as LogLevel;
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
use crate::logs::init_logger;
|
type Error = crate::errors::CliError<AppConfigBuilderError>;
|
||||||
|
|
||||||
pub fn cli_init() -> AppConfig {
|
const DEFAULT_MAX_CONN: usize = 8;
|
||||||
let app = App::new("paperoni")
|
|
||||||
|
#[derive(derive_builder::Builder)]
|
||||||
|
pub struct AppConfig {
|
||||||
|
/// Urls for store in epub
|
||||||
|
pub urls: Vec<String>,
|
||||||
|
pub max_conn: usize,
|
||||||
|
/// Path to file of multiple articles into a single epub
|
||||||
|
pub merged: Option<String>,
|
||||||
|
pub output_directory: Option<String>,
|
||||||
|
pub log_level: LogLevel,
|
||||||
|
pub can_disable_progress_bar: bool,
|
||||||
|
pub start_time: DateTime<Local>,
|
||||||
|
pub is_logging_to_file: bool,
|
||||||
|
pub inline_toc: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppConfig {
|
||||||
|
pub fn init_with_cli() -> Result<AppConfig, Error> {
|
||||||
|
let app = App::new("paperoni")
|
||||||
.settings(&[
|
.settings(&[
|
||||||
AppSettings::ArgRequiredElseHelp,
|
AppSettings::ArgRequiredElseHelp,
|
||||||
AppSettings::UnifiedHelpMessage,
|
AppSettings::UnifiedHelpMessage,
|
||||||
|
@ -28,14 +47,23 @@ pub fn cli_init() -> AppConfig {
|
||||||
.help("Input file containing links")
|
.help("Input file containing links")
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("output_directory")
|
||||||
|
.long("output-dir")
|
||||||
|
.short("o")
|
||||||
|
.help("Directory to store output epub documents")
|
||||||
|
.conflicts_with("output_name")
|
||||||
|
.takes_value(true),
|
||||||
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("output_name")
|
Arg::with_name("output_name")
|
||||||
.long("merge")
|
.long("merge")
|
||||||
.help("Merge multiple articles into a single epub")
|
.help("Merge multiple articles into a single epub")
|
||||||
.long_help("Merge multiple articles into a single epub that will be given the name provided")
|
.long_help("Merge multiple articles into a single epub that will be given the name provided")
|
||||||
|
.conflicts_with("output_directory")
|
||||||
.takes_value(true),
|
.takes_value(true),
|
||||||
).arg(
|
).arg(
|
||||||
Arg::with_name("max_conn")
|
Arg::with_name("max-conn")
|
||||||
.long("max_conn")
|
.long("max_conn")
|
||||||
.help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
|
.help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
|
||||||
.long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
|
.long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
|
||||||
|
@ -59,144 +87,128 @@ pub fn cli_init() -> AppConfig {
|
||||||
Arg::with_name("log-to-file")
|
Arg::with_name("log-to-file")
|
||||||
.long("log-to-file")
|
.long("log-to-file")
|
||||||
.help("Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level")
|
.help("Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level")
|
||||||
.takes_value(false));
|
.takes_value(false))
|
||||||
let arg_matches = app.get_matches();
|
.arg(
|
||||||
|
Arg::with_name("inline-toc")
|
||||||
let mut urls: Vec<String> = match arg_matches.value_of("file") {
|
.long("inline-toc")
|
||||||
Some(file_name) => {
|
.requires("output_name")
|
||||||
if let Ok(mut file) = File::open(file_name) {
|
.help("Add an inlined Table of Contents page at the start of the merged article.")
|
||||||
let mut content = String::new();
|
.long_help("Add an inlined Table of Contents page at the start of the merged article. This does not affect the Table of Contents navigation")
|
||||||
match file.read_to_string(&mut content) {
|
|
||||||
Ok(_) => content
|
|
||||||
.lines()
|
|
||||||
.filter(|line| !line.is_empty())
|
|
||||||
.map(|line| line.to_owned())
|
|
||||||
.collect(),
|
|
||||||
Err(_) => vec![],
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
println!("Unable to open file: {}", file_name);
|
|
||||||
vec![]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => vec![],
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(vals) = arg_matches.values_of("urls") {
|
|
||||||
urls.extend(
|
|
||||||
vals.filter(|val| !val.is_empty())
|
|
||||||
.map(|val| val.to_string()),
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
Self::try_from(app.get_matches())
|
||||||
}
|
}
|
||||||
|
|
||||||
let max_conn = arg_matches
|
fn init_merge_file(self) -> Result<Self, Error> {
|
||||||
.value_of("max_conn")
|
self.merged
|
||||||
.map(|conn_str| conn_str.parse::<usize>().ok())
|
.as_deref()
|
||||||
.flatten()
|
.map(fs::File::create)
|
||||||
.map(|max| if max > 0 { max } else { 1 })
|
.transpose()
|
||||||
.unwrap_or(8);
|
.err()
|
||||||
|
.map(|err| Err(Error::InvalidOutputPath(err.to_string())))
|
||||||
let mut app_config = AppConfig::new(max_conn);
|
.unwrap_or(Ok(self))
|
||||||
app_config.set_urls(urls);
|
|
||||||
|
|
||||||
if let Some(name) = arg_matches.value_of("output_name") {
|
|
||||||
let file_path = Path::new(name);
|
|
||||||
if file_path.is_dir() {
|
|
||||||
eprintln!("{:?} is a directory", name);
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
let file_name = if file_path.extension().is_some() {
|
|
||||||
name.to_owned()
|
|
||||||
} else {
|
|
||||||
name.to_owned() + ".epub"
|
|
||||||
};
|
|
||||||
|
|
||||||
match std::fs::File::create(&file_name) {
|
|
||||||
Ok(_) => (),
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("Unable to create file {:?}\n{}", file_path, e);
|
|
||||||
std::process::exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
app_config.merged = Some(file_name);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if arg_matches.is_present("verbosity") {
|
fn init_logger(self) -> Result<Self, Error> {
|
||||||
if !arg_matches.is_present("log-to-file") {
|
use crate::logs;
|
||||||
app_config.can_disable_progress_bar = true;
|
logs::init_logger(self.log_level, &self.start_time, self.is_logging_to_file)
|
||||||
}
|
.map(|_| self)
|
||||||
let log_levels: [LogLevel; 5] = [
|
.map_err(Error::LogError)
|
||||||
LogLevel::Off,
|
}
|
||||||
LogLevel::Error,
|
}
|
||||||
LogLevel::Warn,
|
|
||||||
LogLevel::Info,
|
use std::convert::TryFrom;
|
||||||
LogLevel::Debug,
|
|
||||||
];
|
impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
||||||
let level = arg_matches.occurrences_of("verbosity").clamp(0, 4) as usize;
|
type Error = Error;
|
||||||
app_config.log_level = log_levels[level];
|
|
||||||
}
|
fn try_from(arg_matches: ArgMatches<'a>) -> Result<Self, Self::Error> {
|
||||||
if arg_matches.is_present("log-to-file") {
|
AppConfigBuilder::default()
|
||||||
app_config.log_level = LogLevel::Debug;
|
.urls({
|
||||||
app_config.is_logging_to_file = true;
|
let url_filter = |url: &str| {
|
||||||
}
|
let url = url.trim();
|
||||||
|
if !url.is_empty() {
|
||||||
init_logger(&app_config);
|
Some(url.to_owned())
|
||||||
|
} else {
|
||||||
app_config
|
None
|
||||||
}
|
}
|
||||||
|
};
|
||||||
pub struct AppConfig {
|
let direct_urls = arg_matches
|
||||||
urls: Vec<String>,
|
.values_of("urls")
|
||||||
max_conn: usize,
|
.and_then(|urls| urls.map(url_filter).collect::<Option<Vec<_>>>())
|
||||||
merged: Option<String>,
|
.unwrap_or(Vec::new());
|
||||||
log_level: LogLevel,
|
let file_urls = arg_matches
|
||||||
can_disable_progress_bar: bool,
|
.value_of("file")
|
||||||
start_time: DateTime<Local>,
|
.map(fs::read_to_string)
|
||||||
is_logging_to_file: bool,
|
.transpose()?
|
||||||
}
|
.and_then(|content| content.lines().map(url_filter).collect::<Option<Vec<_>>>())
|
||||||
|
.unwrap_or(Vec::new());
|
||||||
impl AppConfig {
|
|
||||||
fn new(max_conn: usize) -> Self {
|
let urls = [direct_urls, file_urls]
|
||||||
Self {
|
.concat()
|
||||||
urls: vec![],
|
.into_iter()
|
||||||
max_conn,
|
.unique()
|
||||||
merged: None,
|
.collect_vec();
|
||||||
log_level: LogLevel::Off,
|
if !urls.is_empty() {
|
||||||
can_disable_progress_bar: false,
|
Ok(urls)
|
||||||
start_time: Local::now(),
|
} else {
|
||||||
is_logging_to_file: false,
|
Err(Error::NoUrls)
|
||||||
}
|
}
|
||||||
}
|
}?)
|
||||||
|
.max_conn(match arg_matches.value_of("max-conn") {
|
||||||
fn set_urls(&mut self, urls: Vec<String>) {
|
Some(max_conn) => max_conn.parse::<NonZeroUsize>()?.get(),
|
||||||
self.urls.extend(urls);
|
None => DEFAULT_MAX_CONN,
|
||||||
}
|
})
|
||||||
|
.merged(arg_matches.value_of("output_name").map(|name| {
|
||||||
pub fn urls(&self) -> &Vec<String> {
|
if name.ends_with(".epub") {
|
||||||
&self.urls
|
name.to_owned()
|
||||||
}
|
} else {
|
||||||
pub fn max_conn(&self) -> usize {
|
name.to_string() + ".epub"
|
||||||
self.max_conn
|
}
|
||||||
}
|
}))
|
||||||
|
.can_disable_progress_bar(
|
||||||
pub fn merged(&self) -> Option<&String> {
|
arg_matches.is_present("verbosity") && !arg_matches.is_present("log-to-file"),
|
||||||
self.merged.as_ref()
|
)
|
||||||
}
|
.log_level(match arg_matches.occurrences_of("verbosity") {
|
||||||
|
0 => {
|
||||||
pub fn log_level(&self) -> LogLevel {
|
if !arg_matches.is_present("log-to-file") {
|
||||||
self.log_level
|
LogLevel::Off
|
||||||
}
|
} else {
|
||||||
|
LogLevel::Debug
|
||||||
pub fn can_disable_progress_bar(&self) -> bool {
|
}
|
||||||
self.can_disable_progress_bar
|
}
|
||||||
}
|
1 => LogLevel::Error,
|
||||||
|
2 => LogLevel::Warn,
|
||||||
pub fn start_time(&self) -> &DateTime<Local> {
|
3 => LogLevel::Info,
|
||||||
&self.start_time
|
4..=u64::MAX => LogLevel::Debug,
|
||||||
}
|
})
|
||||||
|
.is_logging_to_file(arg_matches.is_present("log-to-file"))
|
||||||
pub fn is_logging_to_file(&self) -> bool {
|
.inline_toc(arg_matches.is_present("inline-toc"))
|
||||||
self.is_logging_to_file
|
.output_directory(
|
||||||
|
arg_matches
|
||||||
|
.value_of("output_directory")
|
||||||
|
.map(|output_directory| {
|
||||||
|
let path = Path::new(output_directory);
|
||||||
|
if !path.exists() {
|
||||||
|
Err(Error::OutputDirectoryNotExists)
|
||||||
|
} else if !path.is_dir() {
|
||||||
|
Err(Error::WrongOutputDirectory)
|
||||||
|
} else {
|
||||||
|
Ok(output_directory.to_owned())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.transpose()?,
|
||||||
|
)
|
||||||
|
.start_time(Local::now())
|
||||||
|
.try_init()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AppConfigBuilder {
|
||||||
|
pub fn try_init(&self) -> Result<AppConfig, Error> {
|
||||||
|
self.build()
|
||||||
|
.map_err(Error::AppBuildError)?
|
||||||
|
.init_logger()?
|
||||||
|
.init_merge_file()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
440
src/epub.rs
440
src/epub.rs
|
@ -1,28 +1,35 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||||
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
use epub_builder::{EpubBuilder, EpubContent, TocElement, ZipLibrary};
|
||||||
|
use html5ever::tendril::fmt::Slice;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use log::{debug, info};
|
use kuchiki::NodeRef;
|
||||||
|
use log::{debug, error, info};
|
||||||
|
|
||||||
use crate::{
|
use crate::{cli::AppConfig, errors::PaperoniError, extractor::Extractor};
|
||||||
cli::AppConfig,
|
|
||||||
errors::PaperoniError,
|
lazy_static! {
|
||||||
extractor::{self, Extractor},
|
static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r#"(&|<|>|'|")"#).unwrap();
|
||||||
};
|
}
|
||||||
|
|
||||||
pub fn generate_epubs(
|
pub fn generate_epubs(
|
||||||
articles: Vec<Extractor>,
|
articles: Vec<Extractor>,
|
||||||
app_config: &AppConfig,
|
app_config: &AppConfig,
|
||||||
successful_articles_table: &mut Table,
|
successful_articles_table: &mut Table,
|
||||||
) -> Result<(), Vec<PaperoniError>> {
|
) -> Result<(), Vec<PaperoniError>> {
|
||||||
let bar = if app_config.can_disable_progress_bar() {
|
if articles.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let bar = if app_config.can_disable_progress_bar {
|
||||||
ProgressBar::hidden()
|
ProgressBar::hidden()
|
||||||
} else {
|
} else {
|
||||||
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
||||||
let style = ProgressStyle::default_bar().template(
|
let style = ProgressStyle::default_bar().template(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
||||||
);
|
);
|
||||||
enabled_bar.set_style(style);
|
enabled_bar.set_style(style);
|
||||||
if !articles.is_empty() {
|
if !articles.is_empty() {
|
||||||
enabled_bar.set_message("Generating epubs");
|
enabled_bar.set_message("Generating epubs");
|
||||||
|
@ -30,10 +37,12 @@ pub fn generate_epubs(
|
||||||
enabled_bar
|
enabled_bar
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let stylesheet = include_bytes!("./assets/writ.min.css");
|
||||||
|
|
||||||
let mut errors: Vec<PaperoniError> = Vec::new();
|
let mut errors: Vec<PaperoniError> = Vec::new();
|
||||||
|
|
||||||
match app_config.merged() {
|
match app_config.merged {
|
||||||
Some(name) => {
|
Some(ref name) => {
|
||||||
successful_articles_table.set_header(vec![Cell::new("Table of Contents")
|
successful_articles_table.set_header(vec![Cell::new("Table of Contents")
|
||||||
.add_attribute(Attribute::Bold)
|
.add_attribute(Attribute::Bold)
|
||||||
.set_alignment(CellAlignment::Center)
|
.set_alignment(CellAlignment::Center)
|
||||||
|
@ -57,21 +66,43 @@ pub fn generate_epubs(
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
debug!("Creating {:?}", name);
|
debug!("Creating {:?}", name);
|
||||||
epub.inline_toc();
|
|
||||||
|
if app_config.inline_toc {
|
||||||
|
epub.inline_toc();
|
||||||
|
}
|
||||||
|
|
||||||
|
match epub.stylesheet(stylesheet.as_bytes()) {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => {
|
||||||
|
error!("Unable to add stylesheets to epub file");
|
||||||
|
let mut paperoni_err: PaperoniError = e.into();
|
||||||
|
paperoni_err.set_article_source(name);
|
||||||
|
errors.push(paperoni_err);
|
||||||
|
return Err(errors);
|
||||||
|
}
|
||||||
|
}
|
||||||
articles
|
articles
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.fold(&mut epub, |epub, (idx, article)| {
|
.fold(&mut epub, |epub, (idx, article)| {
|
||||||
let mut article_result = || -> Result<(), PaperoniError> {
|
let mut article_result = || -> Result<(), PaperoniError> {
|
||||||
let mut html_buf = Vec::new();
|
let content_url = format!("article_{}.xhtml", idx);
|
||||||
extractor::serialize_to_xhtml(article.article(), &mut html_buf)?;
|
let mut xhtml_buf = Vec::new();
|
||||||
let html_str = std::str::from_utf8(&html_buf)?;
|
let header_level_tocs =
|
||||||
epub.metadata("title", replace_metadata_value(name))?;
|
get_header_level_toc_vec(&content_url, article.article());
|
||||||
|
|
||||||
|
serialize_to_xhtml(article.article(), &mut xhtml_buf)?;
|
||||||
|
let xhtml_str = std::str::from_utf8(&xhtml_buf)?;
|
||||||
let section_name = article.metadata().title();
|
let section_name = article.metadata().title();
|
||||||
epub.add_content(
|
let mut content = EpubContent::new(&content_url, xhtml_str.as_bytes())
|
||||||
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
.title(replace_escaped_characters(section_name));
|
||||||
.title(replace_metadata_value(section_name)),
|
|
||||||
)?;
|
for toc_element in header_level_tocs {
|
||||||
|
content = content.child(toc_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
epub.metadata("title", replace_escaped_characters(name))?;
|
||||||
|
epub.add_content(content)?;
|
||||||
info!("Adding images for {:?}", name);
|
info!("Adding images for {:?}", name);
|
||||||
article.img_urls.iter().for_each(|img| {
|
article.img_urls.iter().for_each(|img| {
|
||||||
// TODO: Add error handling and return errors as a vec
|
// TODO: Add error handling and return errors as a vec
|
||||||
|
@ -100,10 +131,10 @@ pub fn generate_epubs(
|
||||||
let appendix = generate_appendix(articles.iter().collect());
|
let appendix = generate_appendix(articles.iter().collect());
|
||||||
if let Err(err) = epub.add_content(
|
if let Err(err) = epub.add_content(
|
||||||
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||||
.title(replace_metadata_value("Article Sources")),
|
.title(replace_escaped_characters("Article Sources")),
|
||||||
) {
|
) {
|
||||||
let mut paperoni_err: PaperoniError = err.into();
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
paperoni_err.set_article_source(name);
|
paperoni_err.set_article_source(&name);
|
||||||
errors.push(paperoni_err);
|
errors.push(paperoni_err);
|
||||||
return Err(errors);
|
return Err(errors);
|
||||||
}
|
}
|
||||||
|
@ -113,7 +144,7 @@ pub fn generate_epubs(
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let mut paperoni_err: PaperoniError = err.into();
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
paperoni_err.set_article_source(name);
|
paperoni_err.set_article_source(&name);
|
||||||
errors.push(paperoni_err);
|
errors.push(paperoni_err);
|
||||||
return Err(errors);
|
return Err(errors);
|
||||||
}
|
}
|
||||||
|
@ -135,7 +166,8 @@ pub fn generate_epubs(
|
||||||
let mut result = || -> Result<(), PaperoniError> {
|
let mut result = || -> Result<(), PaperoniError> {
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
||||||
let file_name = format!(
|
let file_name = format!(
|
||||||
"{}.epub",
|
"{}/{}.epub",
|
||||||
|
app_config.output_directory.as_deref().unwrap_or("."),
|
||||||
article
|
article
|
||||||
.metadata()
|
.metadata()
|
||||||
.title()
|
.title()
|
||||||
|
@ -144,15 +176,31 @@ pub fn generate_epubs(
|
||||||
);
|
);
|
||||||
debug!("Creating {:?}", file_name);
|
debug!("Creating {:?}", file_name);
|
||||||
let mut out_file = File::create(&file_name).unwrap();
|
let mut out_file = File::create(&file_name).unwrap();
|
||||||
let mut html_buf = Vec::new();
|
let mut xhtml_buf = Vec::new();
|
||||||
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
let header_level_tocs =
|
||||||
|
get_header_level_toc_vec("index.xhtml", article.article());
|
||||||
|
serialize_to_xhtml(article.article(), &mut xhtml_buf)
|
||||||
.expect("Unable to serialize to xhtml");
|
.expect("Unable to serialize to xhtml");
|
||||||
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
let xhtml_str = std::str::from_utf8(&xhtml_buf).unwrap();
|
||||||
|
|
||||||
if let Some(author) = article.metadata().byline() {
|
if let Some(author) = article.metadata().byline() {
|
||||||
epub.metadata("author", replace_metadata_value(author))?;
|
epub.metadata("author", replace_escaped_characters(author))?;
|
||||||
}
|
}
|
||||||
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
|
||||||
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
epub.stylesheet(stylesheet.as_bytes())?;
|
||||||
|
|
||||||
|
let title = replace_escaped_characters(article.metadata().title());
|
||||||
|
epub.metadata("title", &title)?;
|
||||||
|
|
||||||
|
let mut content =
|
||||||
|
EpubContent::new("index.xhtml", xhtml_str.as_bytes()).title(title);
|
||||||
|
|
||||||
|
for toc_element in header_level_tocs {
|
||||||
|
content = content.child(toc_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
epub.add_content(content)?;
|
||||||
|
|
||||||
for img in &article.img_urls {
|
for img in &article.img_urls {
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
file_path.push(&img.0);
|
file_path.push(&img.0);
|
||||||
|
@ -167,7 +215,7 @@ pub fn generate_epubs(
|
||||||
let appendix = generate_appendix(vec![&article]);
|
let appendix = generate_appendix(vec![&article]);
|
||||||
epub.add_content(
|
epub.add_content(
|
||||||
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||||
.title(replace_metadata_value("Article Source")),
|
.title(replace_escaped_characters("Article Source")),
|
||||||
)?;
|
)?;
|
||||||
epub.generate(&mut out_file)?;
|
epub.generate(&mut out_file)?;
|
||||||
bar.inc(1);
|
bar.inc(1);
|
||||||
|
@ -194,7 +242,7 @@ pub fn generate_epubs(
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replaces characters that have to be escaped before adding to the epub's metadata
|
/// Replaces characters that have to be escaped before adding to the epub's metadata
|
||||||
fn replace_metadata_value(value: &str) -> String {
|
fn replace_escaped_characters(value: &str) -> String {
|
||||||
value
|
value
|
||||||
.replace("&", "&")
|
.replace("&", "&")
|
||||||
.replace("<", "<")
|
.replace("<", "<")
|
||||||
|
@ -213,14 +261,15 @@ fn generate_appendix(articles: Vec<&Extractor>) -> String {
|
||||||
};
|
};
|
||||||
format!(
|
format!(
|
||||||
"<a href=\"{}\">{}</a><br></br>",
|
"<a href=\"{}\">{}</a><br></br>",
|
||||||
replace_metadata_value(&article.url),
|
replace_escaped_characters(&article.url),
|
||||||
replace_metadata_value(article_name)
|
replace_escaped_characters(article_name)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let template = format!(
|
let template = format!(
|
||||||
r#"<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
r#"<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||||
<head>
|
<head>
|
||||||
|
<link rel="stylesheet" href="stylesheet.css" type="text/css"></link>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<h2>Appendix</h2><h3>Article sources</h3>
|
<h2>Appendix</h2><h3>Article sources</h3>
|
||||||
|
@ -232,23 +281,334 @@ fn generate_appendix(articles: Vec<&Extractor>) -> String {
|
||||||
template
|
template
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Adds an id attribute to header elements and assigns a value based on
|
||||||
|
/// the hash of the text content. Headers with id attributes are not modified.
|
||||||
|
/// The headers here are known to have text because the grabbed article from
|
||||||
|
/// readability removes headers with no text.
|
||||||
|
fn generate_header_ids(root_node: &NodeRef) {
|
||||||
|
let headers = root_node
|
||||||
|
.select("h1, h2, h3, h4")
|
||||||
|
.expect("Unable to create selector for headings");
|
||||||
|
let headers_no_id = headers.filter(|node_data_ref| {
|
||||||
|
let attrs = node_data_ref.attributes.borrow();
|
||||||
|
!attrs.contains("id")
|
||||||
|
});
|
||||||
|
for header in headers_no_id {
|
||||||
|
let mut attrs = header.attributes.borrow_mut();
|
||||||
|
let text = header.text_contents();
|
||||||
|
// The value of the id begins with an underscore because the hexadecimal
|
||||||
|
// digest might start with a number which would make it an invalid id
|
||||||
|
// when querying with selectors
|
||||||
|
let value = format!("_{:x}", md5::compute(text));
|
||||||
|
attrs.insert("id", value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a vector of `TocElement` from a NodeRef used for adding to the Table of Contents for navigation
|
||||||
|
fn get_header_level_toc_vec(content_url: &str, article: &NodeRef) -> Vec<TocElement> {
|
||||||
|
// Depth starts from 1
|
||||||
|
const HEADER_LEVEL_MAX_DEPTH: usize = 4;
|
||||||
|
let mut headers_vec: Vec<TocElement> = Vec::new();
|
||||||
|
|
||||||
|
let mut header_levels = HashMap::with_capacity(HEADER_LEVEL_MAX_DEPTH);
|
||||||
|
header_levels.insert("h1", 1);
|
||||||
|
header_levels.insert("h2", 2);
|
||||||
|
header_levels.insert("h3", 3);
|
||||||
|
header_levels.insert("h4", 4);
|
||||||
|
|
||||||
|
generate_header_ids(article);
|
||||||
|
|
||||||
|
let headings = article
|
||||||
|
.select("h1, h2, h3, h4")
|
||||||
|
.expect("Unable to create selector for headings");
|
||||||
|
|
||||||
|
// The header list will be generated using some sort of backtracking algorithm
|
||||||
|
// There will be a stack of maximum size 4 (since it only goes to h4 now)
|
||||||
|
let mut stack: Vec<Option<TocElement>> = std::iter::repeat(None)
|
||||||
|
.take(HEADER_LEVEL_MAX_DEPTH)
|
||||||
|
.collect::<_>();
|
||||||
|
|
||||||
|
for heading in headings {
|
||||||
|
let elem_name: &str = &heading.name.local;
|
||||||
|
let attrs = heading.attributes.borrow();
|
||||||
|
let id = attrs
|
||||||
|
.get("id")
|
||||||
|
.map(ToOwned::to_owned)
|
||||||
|
.expect("Unable to get id value in get_header_level_toc_vec");
|
||||||
|
let url = format!("{}#{}", content_url, id);
|
||||||
|
|
||||||
|
let level = header_levels[elem_name];
|
||||||
|
let index = level - 1;
|
||||||
|
|
||||||
|
if let Some(mut existing_toc) = stack.get_mut(index).take().cloned().flatten() {
|
||||||
|
// If a toc element already exists at that header level, consume all the toc elements
|
||||||
|
// of a lower hierarchy e.g if the existing toc is a h2, then the h3 and h4 in the stack
|
||||||
|
// will be consumed.
|
||||||
|
// We collapse the children by folding from the right to the left of the stack.
|
||||||
|
let descendants_levels = HEADER_LEVEL_MAX_DEPTH - level;
|
||||||
|
let folded_descendants = stack
|
||||||
|
.iter_mut()
|
||||||
|
.rev()
|
||||||
|
.take(descendants_levels)
|
||||||
|
.map(|toc_elem| toc_elem.take())
|
||||||
|
.filter(|toc_elem| toc_elem.is_some())
|
||||||
|
.map(|toc_elem| toc_elem.unwrap())
|
||||||
|
.reduce(|child, parent| parent.child(child));
|
||||||
|
|
||||||
|
if let Some(child) = folded_descendants {
|
||||||
|
existing_toc = existing_toc.child(child);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Find the nearest ancestor to embed into.
|
||||||
|
// If this toc_elem was a h1, then just add it to the headers_vec
|
||||||
|
if index == 0 {
|
||||||
|
headers_vec.push(existing_toc);
|
||||||
|
} else {
|
||||||
|
// Otherwise, find the nearest ancestor to add it to. If none exists, add it to the headers_vec
|
||||||
|
let first_ancestor = stack
|
||||||
|
.iter_mut()
|
||||||
|
.take(level - 1)
|
||||||
|
.map(|toc_elem| toc_elem.as_mut())
|
||||||
|
.rfind(|toc_elem| toc_elem.is_some())
|
||||||
|
.flatten();
|
||||||
|
|
||||||
|
match first_ancestor {
|
||||||
|
Some(ancestor_toc_elem) => {
|
||||||
|
*ancestor_toc_elem = ancestor_toc_elem.clone().child(existing_toc);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
headers_vec.push(existing_toc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(toc_elem) = stack.get_mut(index) {
|
||||||
|
*toc_elem = Some(TocElement::new(
|
||||||
|
url,
|
||||||
|
replace_escaped_characters(&heading.text_contents()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let folded_stack = stack
|
||||||
|
.into_iter()
|
||||||
|
.rev()
|
||||||
|
.filter(|toc_elem| toc_elem.is_some())
|
||||||
|
.map(|opt_toc_elem| opt_toc_elem.unwrap())
|
||||||
|
.reduce(|child, parent| parent.child(child));
|
||||||
|
if let Some(toc_elem) = folded_stack {
|
||||||
|
headers_vec.push(toc_elem)
|
||||||
|
}
|
||||||
|
|
||||||
|
headers_vec
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serializes a NodeRef to a string that is XHTML compatible
|
||||||
|
/// The only DOM nodes serialized are Text and Element nodes
|
||||||
|
fn serialize_to_xhtml<W: std::io::Write>(
|
||||||
|
node_ref: &NodeRef,
|
||||||
|
mut w: &mut W,
|
||||||
|
) -> Result<(), PaperoniError> {
|
||||||
|
let mut escape_map = HashMap::new();
|
||||||
|
escape_map.insert("<", "<");
|
||||||
|
escape_map.insert(">", ">");
|
||||||
|
escape_map.insert("&", "&");
|
||||||
|
escape_map.insert("\"", """);
|
||||||
|
escape_map.insert("'", "'");
|
||||||
|
for edge in node_ref.traverse_inclusive() {
|
||||||
|
match edge {
|
||||||
|
kuchiki::iter::NodeEdge::Start(n) => match n.data() {
|
||||||
|
kuchiki::NodeData::Text(rc_text) => {
|
||||||
|
let text = rc_text.borrow();
|
||||||
|
let esc_text = ESC_SEQ_REGEX
|
||||||
|
.replace_all(&text, |captures: ®ex::Captures| escape_map[&captures[1]]);
|
||||||
|
write!(&mut w, "{}", esc_text)?;
|
||||||
|
}
|
||||||
|
kuchiki::NodeData::Element(elem_data) => {
|
||||||
|
let attrs = elem_data.attributes.borrow();
|
||||||
|
let attrs_str = attrs
|
||||||
|
.map
|
||||||
|
.iter()
|
||||||
|
.filter(|(k, _)| !k.local.contains("\""))
|
||||||
|
.map(|(k, v)| {
|
||||||
|
format!(
|
||||||
|
"{}=\"{}\"",
|
||||||
|
k.local,
|
||||||
|
ESC_SEQ_REGEX
|
||||||
|
.replace_all(&v.value, |captures: ®ex::Captures| {
|
||||||
|
escape_map[&captures[1]]
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.fold("".to_string(), |acc, val| acc + " " + &val);
|
||||||
|
write!(&mut w, "<{}{}>", &elem_data.name.local, attrs_str)?;
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
},
|
||||||
|
kuchiki::iter::NodeEdge::End(n) => match n.data() {
|
||||||
|
kuchiki::NodeData::Element(elem_data) => {
|
||||||
|
write!(&mut w, "</{}>", &elem_data.name.local)?;
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::replace_metadata_value;
|
use kuchiki::traits::*;
|
||||||
|
|
||||||
|
use super::{generate_header_ids, get_header_level_toc_vec, replace_escaped_characters};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_replace_metadata_value() {
|
fn test_replace_escaped_characters() {
|
||||||
let mut value = "Lorem ipsum";
|
let mut value = "Lorem ipsum";
|
||||||
assert_eq!(replace_metadata_value(value), "Lorem ipsum");
|
assert_eq!(replace_escaped_characters(value), "Lorem ipsum");
|
||||||
value = "Memory safe > memory unsafe";
|
value = "Memory safe > memory unsafe";
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
replace_metadata_value(value),
|
replace_escaped_characters(value),
|
||||||
"Memory safe > memory unsafe"
|
"Memory safe > memory unsafe"
|
||||||
);
|
);
|
||||||
value = "Author Name <author@mail.example>";
|
value = "Author Name <author@mail.example>";
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
replace_metadata_value(value),
|
replace_escaped_characters(value),
|
||||||
"Author Name <author@mail.example>"
|
"Author Name <author@mail.example>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_generate_header_ids() {
|
||||||
|
let html_str = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Heading 1</h1>
|
||||||
|
<h2 id="heading-2">Heading 2</h2>
|
||||||
|
<h2 id="heading-2-again">Heading 2 again</h2>
|
||||||
|
<h4>Heading 4</h4>
|
||||||
|
<h1>Heading 1 again</h1>
|
||||||
|
<h3 class="heading">Heading 3</h3>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
generate_header_ids(&doc);
|
||||||
|
|
||||||
|
let mut headers = doc.select("h1, h2, h3, h4").unwrap();
|
||||||
|
let all_headers_have_ids = headers.all(|node_data_ref| {
|
||||||
|
let attrs = node_data_ref.attributes.borrow();
|
||||||
|
if let Some(id) = attrs.get("id") {
|
||||||
|
!id.trim().is_empty()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
});
|
||||||
|
assert_eq!(true, all_headers_have_ids);
|
||||||
|
|
||||||
|
let selector = format!("h1#_{:x}", md5::compute("Heading 1"));
|
||||||
|
assert_eq!(true, doc.select_first(&selector).is_ok());
|
||||||
|
|
||||||
|
let selector = format!("h1#_{:x}", md5::compute("Heading 1 again"));
|
||||||
|
assert_eq!(true, doc.select_first(&selector).is_ok());
|
||||||
|
|
||||||
|
let selector = "h2#heading-2-again";
|
||||||
|
assert_eq!(true, doc.select_first(selector).is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_header_level_toc_vec() {
|
||||||
|
// NOTE: Due to `TocElement` not implementing PartialEq, the tests here
|
||||||
|
// will need to be manually written to cover for this
|
||||||
|
let html_str = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
|
||||||
|
let toc_vec = get_header_level_toc_vec("index.xhtml", &doc);
|
||||||
|
assert_eq!(0, toc_vec.len());
|
||||||
|
|
||||||
|
let html_str = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1 id="heading-1">Heading 1</h1>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<div>
|
||||||
|
<h2 id="heading-2">Heading 2</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
</div>
|
||||||
|
<h3 id="subheading-3">Subheading 3</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<h1 id="heading-2">Second Heading 1</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
|
||||||
|
let toc_vec = get_header_level_toc_vec("index.xhtml", &doc);
|
||||||
|
assert_eq!(2, toc_vec.len());
|
||||||
|
|
||||||
|
let first_h1_toc = toc_vec.first().unwrap();
|
||||||
|
assert_eq!("Heading 1", first_h1_toc.title);
|
||||||
|
assert_eq!(1, first_h1_toc.children.len());
|
||||||
|
|
||||||
|
let h2_toc = first_h1_toc.children.first().unwrap();
|
||||||
|
assert_eq!("Heading 2", h2_toc.title);
|
||||||
|
assert_eq!(1, h2_toc.children.len());
|
||||||
|
|
||||||
|
let h3_toc = h2_toc.children.first().unwrap();
|
||||||
|
assert_eq!("Subheading 3", h3_toc.title);
|
||||||
|
assert_eq!(0, h3_toc.children.len());
|
||||||
|
|
||||||
|
let last_h1_toc = toc_vec.last().unwrap();
|
||||||
|
assert_eq!("Second Heading 1", last_h1_toc.title);
|
||||||
|
assert_eq!(0, last_h1_toc.children.len());
|
||||||
|
|
||||||
|
let html_str = r#"
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1 id="heading-1">Heading 1</h1>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<div>
|
||||||
|
<h2 id="heading-2">Heading 2</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<h3 id="subheading-3">Subheading 3</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
</div>
|
||||||
|
<h2 id="heading-2">Heading 2</h2>
|
||||||
|
<p>Lorem ipsum</p>
|
||||||
|
<h4 id="subheading-4">Subheading 4</h4>
|
||||||
|
<h2 id="conclusion">Conclusion</h2>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
|
||||||
|
let toc_vec = get_header_level_toc_vec("index.xhtml", &doc);
|
||||||
|
assert_eq!(1, toc_vec.len());
|
||||||
|
|
||||||
|
let h1_toc = toc_vec.first().unwrap();
|
||||||
|
assert_eq!("Heading 1", h1_toc.title);
|
||||||
|
assert_eq!(3, h1_toc.children.len());
|
||||||
|
|
||||||
|
let first_h2_toc = h1_toc.children.first().unwrap();
|
||||||
|
assert_eq!("Heading 2", first_h2_toc.title);
|
||||||
|
assert_eq!(1, first_h2_toc.children.len());
|
||||||
|
|
||||||
|
let h3_toc = first_h2_toc.children.first().unwrap();
|
||||||
|
assert_eq!("Subheading 3", h3_toc.title);
|
||||||
|
assert_eq!(0, h3_toc.children.len());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
use std::fmt::{Debug, Display};
|
||||||
|
|
||||||
|
use flexi_logger::FlexiLoggerError;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
|
@ -124,3 +127,33 @@ impl From<std::str::Utf8Error> for PaperoniError {
|
||||||
PaperoniError::with_kind(ErrorKind::UTF8Error(err.to_string()))
|
PaperoniError::with_kind(ErrorKind::UTF8Error(err.to_string()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum LogError {
|
||||||
|
#[error(transparent)]
|
||||||
|
FlexiError(#[from] FlexiLoggerError),
|
||||||
|
#[error("Unable to get user directories for logging purposes")]
|
||||||
|
UserDirectoriesError,
|
||||||
|
#[error("Can't create log directory: {0}")]
|
||||||
|
CreateLogDirectoryError(#[from] std::io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum CliError<BuilderError: Debug + Display> {
|
||||||
|
#[error("Failed to open file with urls: {0}")]
|
||||||
|
UrlFileError(#[from] std::io::Error),
|
||||||
|
#[error("Failed to parse max connection value: {0}")]
|
||||||
|
InvalidMaxConnectionCount(#[from] std::num::ParseIntError),
|
||||||
|
#[error("No urls were provided")]
|
||||||
|
NoUrls,
|
||||||
|
#[error("Failed to build cli application: {0}")]
|
||||||
|
AppBuildError(BuilderError),
|
||||||
|
#[error("Invalid output path name for merged epubs: {0}")]
|
||||||
|
InvalidOutputPath(String),
|
||||||
|
#[error("Wrong output directory")]
|
||||||
|
WrongOutputDirectory,
|
||||||
|
#[error("Output directory does not exist")]
|
||||||
|
OutputDirectoryNotExists,
|
||||||
|
#[error("Unable to start logger!\n{0}")]
|
||||||
|
LogError(#[from] LogError),
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
use std::collections::HashMap;
|
use itertools::Itertools;
|
||||||
|
|
||||||
use kuchiki::{traits::*, NodeRef};
|
use kuchiki::{traits::*, NodeRef};
|
||||||
|
|
||||||
use crate::errors::PaperoniError;
|
use crate::errors::PaperoniError;
|
||||||
|
@ -7,10 +6,6 @@ use crate::moz_readability::{MetaData, Readability};
|
||||||
|
|
||||||
pub type ResourceInfo = (String, Option<String>);
|
pub type ResourceInfo = (String, Option<String>);
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r#"(&|<|>|'|")"#).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Extractor {
|
pub struct Extractor {
|
||||||
article: Option<NodeRef>,
|
article: Option<NodeRef>,
|
||||||
pub img_urls: Vec<ResourceInfo>,
|
pub img_urls: Vec<ResourceInfo>,
|
||||||
|
@ -37,6 +32,7 @@ impl Extractor {
|
||||||
let template = r#"
|
let template = r#"
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||||
<head>
|
<head>
|
||||||
|
<link rel="stylesheet" href="stylesheet.css" type="text/css"></link>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
</body>
|
</body>
|
||||||
|
@ -53,15 +49,19 @@ impl Extractor {
|
||||||
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
||||||
pub fn extract_img_urls(&mut self) {
|
pub fn extract_img_urls(&mut self) {
|
||||||
if let Some(content_ref) = &self.article {
|
if let Some(content_ref) = &self.article {
|
||||||
for img_ref in content_ref.select("img").unwrap() {
|
self.img_urls = content_ref
|
||||||
img_ref.as_node().as_element().map(|img_elem| {
|
.select("img")
|
||||||
img_elem.attributes.borrow().get("src").map(|img_url| {
|
.unwrap()
|
||||||
if !(img_url.is_empty() || img_url.starts_with("data:image")) {
|
.filter_map(|img_ref| {
|
||||||
self.img_urls.push((img_url.to_string(), None))
|
let attrs = img_ref.attributes.borrow();
|
||||||
}
|
attrs
|
||||||
})
|
.get("src")
|
||||||
});
|
.filter(|val| !(val.is_empty() || val.starts_with("data:image")))
|
||||||
}
|
.map(ToString::to_string)
|
||||||
|
})
|
||||||
|
.unique()
|
||||||
|
.map(|val| (val, None))
|
||||||
|
.collect();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,59 +77,6 @@ impl Extractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Serializes a NodeRef to a string that is XHTML compatible
|
|
||||||
/// The only DOM nodes serialized are Text and Element nodes
|
|
||||||
pub fn serialize_to_xhtml<W: std::io::Write>(
|
|
||||||
node_ref: &NodeRef,
|
|
||||||
mut w: &mut W,
|
|
||||||
) -> Result<(), PaperoniError> {
|
|
||||||
let mut escape_map = HashMap::new();
|
|
||||||
escape_map.insert("<", "<");
|
|
||||||
escape_map.insert(">", ">");
|
|
||||||
escape_map.insert("&", "&");
|
|
||||||
escape_map.insert("\"", """);
|
|
||||||
escape_map.insert("'", "'");
|
|
||||||
for edge in node_ref.traverse_inclusive() {
|
|
||||||
match edge {
|
|
||||||
kuchiki::iter::NodeEdge::Start(n) => match n.data() {
|
|
||||||
kuchiki::NodeData::Text(rc_text) => {
|
|
||||||
let text = rc_text.borrow();
|
|
||||||
let esc_text = ESC_SEQ_REGEX
|
|
||||||
.replace_all(&text, |captures: ®ex::Captures| escape_map[&captures[1]]);
|
|
||||||
write!(&mut w, "{}", esc_text)?;
|
|
||||||
}
|
|
||||||
kuchiki::NodeData::Element(elem_data) => {
|
|
||||||
let attrs = elem_data.attributes.borrow();
|
|
||||||
let attrs_str = attrs
|
|
||||||
.map
|
|
||||||
.iter()
|
|
||||||
.filter(|(k, _)| &k.local != "\"")
|
|
||||||
.map(|(k, v)| {
|
|
||||||
format!(
|
|
||||||
"{}=\"{}\"",
|
|
||||||
k.local,
|
|
||||||
ESC_SEQ_REGEX
|
|
||||||
.replace_all(&v.value, |captures: ®ex::Captures| {
|
|
||||||
escape_map[&captures[1]]
|
|
||||||
})
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.fold("".to_string(), |acc, val| acc + " " + &val);
|
|
||||||
write!(&mut w, "<{}{}>", &elem_data.name.local, attrs_str)?;
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
},
|
|
||||||
kuchiki::iter::NodeEdge::End(n) => match n.data() {
|
|
||||||
kuchiki::NodeData::Element(elem_data) => {
|
|
||||||
write!(&mut w, "</{}>", &elem_data.name.local)?;
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
82
src/http.rs
82
src/http.rs
|
@ -1,14 +1,72 @@
|
||||||
use async_std::io::prelude::*;
|
use async_std::io::prelude::*;
|
||||||
|
use async_std::task;
|
||||||
use async_std::{fs::File, stream};
|
use async_std::{fs::File, stream};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
|
use log::warn;
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::cli::AppConfig;
|
||||||
use crate::errors::{ErrorKind, ImgError, PaperoniError};
|
use crate::errors::{ErrorKind, ImgError, PaperoniError};
|
||||||
use crate::extractor::Extractor;
|
use crate::extractor::Extractor;
|
||||||
type HTMLResource = (String, String);
|
type HTMLResource = (String, String);
|
||||||
|
|
||||||
|
pub fn download(
|
||||||
|
app_config: &AppConfig,
|
||||||
|
bar: &ProgressBar,
|
||||||
|
partial_downloads: &mut Vec<PartialDownload>,
|
||||||
|
errors: &mut Vec<PaperoniError>,
|
||||||
|
) -> Vec<Extractor> {
|
||||||
|
task::block_on(async {
|
||||||
|
let urls_iter = app_config.urls.iter().map(|url| fetch_html(url));
|
||||||
|
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn);
|
||||||
|
let mut articles = Vec::new();
|
||||||
|
while let Some(fetch_result) = responses.next().await {
|
||||||
|
match fetch_result {
|
||||||
|
Ok((url, html)) => {
|
||||||
|
debug!("Extracting {}", &url);
|
||||||
|
let mut extractor = Extractor::from_html(&html, &url);
|
||||||
|
bar.set_message("Extracting...");
|
||||||
|
match extractor.extract_content() {
|
||||||
|
Ok(_) => {
|
||||||
|
extractor.extract_img_urls();
|
||||||
|
if let Err(img_errors) =
|
||||||
|
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
partial_downloads
|
||||||
|
.push(PartialDownload::new(&url, extractor.metadata().title()));
|
||||||
|
warn!(
|
||||||
|
"{} image{} failed to download for {}",
|
||||||
|
img_errors.len(),
|
||||||
|
if img_errors.len() > 1 { "s" } else { "" },
|
||||||
|
url
|
||||||
|
);
|
||||||
|
for img_error in img_errors {
|
||||||
|
warn!(
|
||||||
|
"{}\n\t\tReason {}",
|
||||||
|
img_error.url().as_ref().unwrap(),
|
||||||
|
img_error
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
articles.push(extractor);
|
||||||
|
}
|
||||||
|
Err(mut e) => {
|
||||||
|
e.set_article_source(&url);
|
||||||
|
errors.push(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => errors.push(e),
|
||||||
|
}
|
||||||
|
bar.inc(1);
|
||||||
|
}
|
||||||
|
articles
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
let client = surf::Client::new();
|
let client = surf::Client::new();
|
||||||
debug!("Fetching {}", url);
|
debug!("Fetching {}", url);
|
||||||
|
@ -153,7 +211,11 @@ pub async fn download_images(
|
||||||
})
|
})
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(img_idx, (url, req))| async move {
|
.map(|(img_idx, (url, req))| async move {
|
||||||
bar.set_message(format!("Downloading images [{}/{}]", img_idx + 1, img_count).as_str());
|
bar.set_message(format!(
|
||||||
|
"Downloading images [{}/{}]",
|
||||||
|
img_idx + 1,
|
||||||
|
img_count
|
||||||
|
));
|
||||||
match req.await {
|
match req.await {
|
||||||
Ok(mut img_response) => {
|
Ok(mut img_response) => {
|
||||||
let process_response =
|
let process_response =
|
||||||
|
@ -206,6 +268,20 @@ pub async fn download_images(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct PartialDownload {
|
||||||
|
pub link: String,
|
||||||
|
pub title: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialDownload {
|
||||||
|
pub fn new(link: &str, title: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
link: link.into(),
|
||||||
|
title: title.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Handles getting the extension from a given MIME subtype.
|
/// Handles getting the extension from a given MIME subtype.
|
||||||
fn map_mime_subtype_to_ext(subtype: &str) -> &str {
|
fn map_mime_subtype_to_ext(subtype: &str) -> &str {
|
||||||
if subtype == ("svg+xml") {
|
if subtype == ("svg+xml") {
|
||||||
|
@ -234,9 +310,9 @@ fn get_absolute_url(url: &str, request_url: &Url) -> String {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.join(url)
|
.join(url)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into_string()
|
.into()
|
||||||
} else {
|
} else {
|
||||||
request_url.join(url).unwrap().into_string()
|
request_url.join(url).unwrap().into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
240
src/logs.rs
240
src/logs.rs
|
@ -1,18 +1,21 @@
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
use chrono::{DateTime, Local};
|
||||||
use colored::*;
|
use colored::*;
|
||||||
use comfy_table::presets::UTF8_HORIZONTAL_BORDERS_ONLY;
|
use comfy_table::presets::UTF8_HORIZONTAL_BORDERS_ONLY;
|
||||||
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
||||||
use directories::UserDirs;
|
use flexi_logger::{FileSpec, LevelFilter};
|
||||||
use flexi_logger::LogSpecBuilder;
|
|
||||||
use log::error;
|
use log::error;
|
||||||
|
|
||||||
use crate::{cli::AppConfig, errors::PaperoniError};
|
use crate::errors::PaperoniError;
|
||||||
|
|
||||||
pub fn display_summary(
|
pub fn display_summary(
|
||||||
initial_article_count: usize,
|
initial_article_count: usize,
|
||||||
succesful_articles_table: Table,
|
succesful_articles_table: Table,
|
||||||
partial_downloads_count: usize,
|
partial_downloads: Vec<PartialDownload>,
|
||||||
errors: Vec<PaperoniError>,
|
errors: Vec<PaperoniError>,
|
||||||
) {
|
) {
|
||||||
|
let partial_downloads_count = partial_downloads.len();
|
||||||
let successfully_downloaded_count =
|
let successfully_downloaded_count =
|
||||||
initial_article_count - partial_downloads_count - errors.len();
|
initial_article_count - partial_downloads_count - errors.len();
|
||||||
|
|
||||||
|
@ -30,6 +33,24 @@ pub fn display_summary(
|
||||||
if successfully_downloaded_count > 0 {
|
if successfully_downloaded_count > 0 {
|
||||||
println!("{}", succesful_articles_table);
|
println!("{}", succesful_articles_table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if partial_downloads_count > 0 {
|
||||||
|
println!("\n{}", "Partially failed downloads".yellow().bold());
|
||||||
|
let mut table_partial = Table::new();
|
||||||
|
table_partial
|
||||||
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
|
.set_header(vec![
|
||||||
|
Cell::new("Link").set_alignment(CellAlignment::Center),
|
||||||
|
Cell::new("Title").set_alignment(CellAlignment::Center),
|
||||||
|
])
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
|
||||||
|
for partial in partial_downloads {
|
||||||
|
table_partial.add_row(vec![&partial.link, &partial.title]);
|
||||||
|
}
|
||||||
|
println!("{}", table_partial);
|
||||||
|
}
|
||||||
|
|
||||||
if !errors.is_empty() {
|
if !errors.is_empty() {
|
||||||
println!("\n{}", "Failed article downloads".bright_red().bold());
|
println!("\n{}", "Failed article downloads".bright_red().bold());
|
||||||
let mut table_failed = Table::new();
|
let mut table_failed = Table::new();
|
||||||
|
@ -55,76 +76,55 @@ pub fn display_summary(
|
||||||
|
|
||||||
/// Returns a string summary of the total number of failed and successful article downloads
|
/// Returns a string summary of the total number of failed and successful article downloads
|
||||||
fn short_summary(download_count: DownloadCount) -> String {
|
fn short_summary(download_count: DownloadCount) -> String {
|
||||||
// TODO: Refactor this
|
|
||||||
if download_count.total
|
if download_count.total
|
||||||
!= download_count.successful + download_count.failed + download_count.partial
|
!= download_count.successful + download_count.failed + download_count.partial
|
||||||
{
|
{
|
||||||
panic!("initial_count must be equal to the sum of failed and successful count")
|
panic!("initial_count must be equal to the sum of failed and successful count")
|
||||||
}
|
}
|
||||||
let get_noun = |count: usize| if count == 1 { "article" } else { "articles" };
|
let get_noun = |count: usize| if count == 1 { "article" } else { "articles" };
|
||||||
if download_count.successful == download_count.total && download_count.successful == 1 {
|
let get_summary = |count, label, color: Color| {
|
||||||
"Article downloaded successfully".green().to_string()
|
if count == 0 {
|
||||||
} else if download_count.total == download_count.failed && download_count.failed == 1 {
|
return "".to_string();
|
||||||
"Article failed to download".red().to_string()
|
};
|
||||||
} else if download_count.total == download_count.partial && download_count.partial == 1 {
|
|
||||||
"Article partially failed to download".yellow().to_string()
|
{
|
||||||
} else if download_count.successful == download_count.total {
|
if count == 1 && count == download_count.total {
|
||||||
"All articles downloaded successfully".green().to_string()
|
"Article".to_string() + label
|
||||||
} else if download_count.failed == download_count.total {
|
} else if count == download_count.total {
|
||||||
"All articles failed to download".red().to_string()
|
"All ".to_string() + get_noun(count) + label
|
||||||
} else if download_count.partial == download_count.total {
|
} else {
|
||||||
"All articles partially failed to download"
|
count.to_string() + " " + get_noun(count) + label
|
||||||
.yellow()
|
}
|
||||||
.to_string()
|
}
|
||||||
} else if download_count.partial == 0 {
|
.color(color)
|
||||||
format!(
|
|
||||||
"{} {} downloaded successfully, {} {} failed",
|
|
||||||
download_count.successful,
|
|
||||||
get_noun(download_count.successful),
|
|
||||||
download_count.failed,
|
|
||||||
get_noun(download_count.failed)
|
|
||||||
)
|
|
||||||
.yellow()
|
|
||||||
.to_string()
|
|
||||||
} else if download_count.successful == 0
|
|
||||||
&& download_count.partial > 0
|
|
||||||
&& download_count.failed > 0
|
|
||||||
{
|
|
||||||
format!(
|
|
||||||
"{} {} partially failed to download, {} {} failed",
|
|
||||||
download_count.partial,
|
|
||||||
get_noun(download_count.partial),
|
|
||||||
download_count.failed,
|
|
||||||
get_noun(download_count.failed)
|
|
||||||
)
|
|
||||||
.yellow()
|
|
||||||
.to_string()
|
|
||||||
} else if download_count.failed == 0
|
|
||||||
&& download_count.successful > 0
|
|
||||||
&& download_count.partial > 0
|
|
||||||
{
|
|
||||||
format!(
|
|
||||||
"{} {} downloaded successfully, {} {} partially failed to download",
|
|
||||||
download_count.successful,
|
|
||||||
get_noun(download_count.successful),
|
|
||||||
download_count.partial,
|
|
||||||
get_noun(download_count.partial)
|
|
||||||
)
|
|
||||||
.yellow()
|
|
||||||
.to_string()
|
.to_string()
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut summary = get_summary(
|
||||||
|
download_count.successful,
|
||||||
|
" downloaded successfully",
|
||||||
|
Color::BrightGreen,
|
||||||
|
);
|
||||||
|
|
||||||
|
let partial_summary = get_summary(
|
||||||
|
download_count.partial,
|
||||||
|
" partially failed to download",
|
||||||
|
Color::Yellow,
|
||||||
|
);
|
||||||
|
|
||||||
|
if !summary.is_empty() && !partial_summary.is_empty() {
|
||||||
|
summary = summary + ", " + &partial_summary;
|
||||||
} else {
|
} else {
|
||||||
format!(
|
summary = summary + &partial_summary;
|
||||||
"{} {} downloaded successfully, {} {} partially failed to download, {} {} failed",
|
|
||||||
download_count.successful,
|
|
||||||
get_noun(download_count.successful),
|
|
||||||
download_count.partial,
|
|
||||||
get_noun(download_count.partial),
|
|
||||||
download_count.failed,
|
|
||||||
get_noun(download_count.failed)
|
|
||||||
)
|
|
||||||
.yellow()
|
|
||||||
.to_string()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let failed_summary = get_summary(download_count.failed, " failed to download", Color::Red);
|
||||||
|
if !summary.is_empty() && !failed_summary.is_empty() {
|
||||||
|
summary = summary + ", " + &failed_summary;
|
||||||
|
} else {
|
||||||
|
summary = summary + &failed_summary;
|
||||||
|
}
|
||||||
|
summary
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DownloadCount {
|
struct DownloadCount {
|
||||||
|
@ -144,44 +144,43 @@ impl DownloadCount {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn init_logger(app_config: &AppConfig) {
|
use crate::errors::LogError as Error;
|
||||||
|
use crate::http::PartialDownload;
|
||||||
|
|
||||||
|
pub fn init_logger(
|
||||||
|
log_level: LevelFilter,
|
||||||
|
start_time: &DateTime<Local>,
|
||||||
|
is_logging_to_file: bool,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
use directories::UserDirs;
|
||||||
|
use flexi_logger::LogSpecBuilder;
|
||||||
|
|
||||||
match UserDirs::new() {
|
match UserDirs::new() {
|
||||||
Some(user_dirs) => {
|
Some(user_dirs) => {
|
||||||
let home_dir = user_dirs.home_dir();
|
let home_dir = user_dirs.home_dir();
|
||||||
let paperoni_dir = home_dir.join(".paperoni");
|
let paperoni_dir = home_dir.join(".paperoni");
|
||||||
let log_dir = paperoni_dir.join("logs");
|
let log_dir = paperoni_dir.join("logs");
|
||||||
|
|
||||||
let log_spec = LogSpecBuilder::new()
|
let log_spec = LogSpecBuilder::new().module("paperoni", log_level).build();
|
||||||
.module("paperoni", app_config.log_level())
|
let formatted_timestamp = start_time.format("%Y-%m-%d_%H-%M-%S");
|
||||||
.build();
|
|
||||||
let formatted_timestamp = app_config.start_time().format("%Y-%m-%d_%H-%M-%S");
|
|
||||||
let mut logger = flexi_logger::Logger::with(log_spec);
|
let mut logger = flexi_logger::Logger::with(log_spec);
|
||||||
|
|
||||||
if app_config.is_logging_to_file() && (!paperoni_dir.is_dir() || !log_dir.is_dir()) {
|
if is_logging_to_file {
|
||||||
match std::fs::create_dir_all(&log_dir) {
|
if !paperoni_dir.is_dir() || !log_dir.is_dir() {
|
||||||
Ok(_) => (),
|
fs::create_dir_all(&log_dir)?;
|
||||||
Err(e) => {
|
}
|
||||||
eprintln!("Unable to create paperoni directories on home directory for logging purposes\n{}",e);
|
logger = logger.log_to_file(
|
||||||
std::process::exit(1);
|
FileSpec::default()
|
||||||
}
|
.directory(log_dir)
|
||||||
};
|
.discriminant(formatted_timestamp.to_string())
|
||||||
}
|
.suppress_timestamp(),
|
||||||
|
);
|
||||||
if app_config.is_logging_to_file() {
|
|
||||||
logger = logger
|
|
||||||
.directory(log_dir)
|
|
||||||
.discriminant(formatted_timestamp.to_string())
|
|
||||||
.suppress_timestamp()
|
|
||||||
.log_to_file();
|
|
||||||
}
|
|
||||||
|
|
||||||
match logger.start() {
|
|
||||||
Ok(_) => (),
|
|
||||||
Err(e) => eprintln!("Unable to start logger!\n{}", e),
|
|
||||||
}
|
}
|
||||||
|
logger.start()?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
None => eprintln!("Unable to get user directories for logging purposes"),
|
None => Err(Error::UserDirectoriesError),
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -192,7 +191,7 @@ mod tests {
|
||||||
fn test_short_summary() {
|
fn test_short_summary() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(1, 1, 0, 0)),
|
short_summary(DownloadCount::new(1, 1, 0, 0)),
|
||||||
"Article downloaded successfully".green().to_string()
|
"Article downloaded successfully".bright_green().to_string()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(1, 0, 0, 1)),
|
short_summary(DownloadCount::new(1, 0, 0, 1)),
|
||||||
|
@ -200,7 +199,9 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(10, 10, 0, 0)),
|
short_summary(DownloadCount::new(10, 10, 0, 0)),
|
||||||
"All articles downloaded successfully".green().to_string()
|
"All articles downloaded successfully"
|
||||||
|
.bright_green()
|
||||||
|
.to_string()
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(10, 0, 0, 10)),
|
short_summary(DownloadCount::new(10, 0, 0, 10)),
|
||||||
|
@ -208,39 +209,52 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(10, 8, 0, 2)),
|
short_summary(DownloadCount::new(10, 8, 0, 2)),
|
||||||
"8 articles downloaded successfully, 2 articles failed"
|
format!(
|
||||||
.yellow()
|
"{}, {}",
|
||||||
.to_string()
|
"8 articles downloaded successfully".bright_green(),
|
||||||
|
"2 articles failed to download".red()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(10, 1, 0, 9)),
|
short_summary(DownloadCount::new(10, 1, 0, 9)),
|
||||||
"1 article downloaded successfully, 9 articles failed"
|
format!(
|
||||||
.yellow()
|
"{}, {}",
|
||||||
.to_string()
|
"1 article downloaded successfully".bright_green(),
|
||||||
|
"9 articles failed to download".red()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(7, 6, 0, 1)),
|
short_summary(DownloadCount::new(7, 6, 0, 1)),
|
||||||
"6 articles downloaded successfully, 1 article failed"
|
format!(
|
||||||
.yellow()
|
"{}, {}",
|
||||||
.to_string()
|
"6 articles downloaded successfully".bright_green(),
|
||||||
|
"1 article failed to download".red()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(7, 4, 2, 1)),
|
short_summary(DownloadCount::new(7, 4, 2, 1)),
|
||||||
"4 articles downloaded successfully, 2 articles partially failed to download, 1 article failed"
|
format!(
|
||||||
.yellow()
|
"{}, {}, {}",
|
||||||
.to_string()
|
"4 articles downloaded successfully".bright_green(),
|
||||||
|
"2 articles partially failed to download".yellow(),
|
||||||
|
"1 article failed to download".red()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(12, 6, 6, 0)),
|
short_summary(DownloadCount::new(12, 6, 6, 0)),
|
||||||
"6 articles downloaded successfully, 6 articles partially failed to download"
|
format!(
|
||||||
.yellow()
|
"{}, {}",
|
||||||
.to_string()
|
"6 articles downloaded successfully".bright_green(),
|
||||||
|
"6 articles partially failed to download".yellow()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(5, 0, 4, 1)),
|
short_summary(DownloadCount::new(5, 0, 4, 1)),
|
||||||
"4 articles partially failed to download, 1 article failed"
|
format!(
|
||||||
.yellow()
|
"{}, {}",
|
||||||
.to_string()
|
"4 articles partially failed to download".yellow(),
|
||||||
|
"1 article failed to download".red()
|
||||||
|
)
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
short_summary(DownloadCount::new(4, 0, 4, 0)),
|
short_summary(DownloadCount::new(4, 0, 4, 0)),
|
||||||
|
|
107
src/main.rs
107
src/main.rs
|
@ -1,14 +1,12 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
use async_std::stream;
|
use std::process::exit;
|
||||||
use async_std::task;
|
|
||||||
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||||
use comfy_table::{ContentArrangement, Table};
|
use comfy_table::{ContentArrangement, Table};
|
||||||
use futures::stream::StreamExt;
|
use http::download;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use log::{debug, warn};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
mod cli;
|
mod cli;
|
||||||
mod epub;
|
mod epub;
|
||||||
|
@ -22,78 +20,48 @@ mod moz_readability;
|
||||||
|
|
||||||
use cli::AppConfig;
|
use cli::AppConfig;
|
||||||
use epub::generate_epubs;
|
use epub::generate_epubs;
|
||||||
use extractor::Extractor;
|
|
||||||
use http::{download_images, fetch_html};
|
|
||||||
use logs::display_summary;
|
use logs::display_summary;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app_config = cli::cli_init();
|
let app_config = match cli::AppConfig::init_with_cli() {
|
||||||
|
Ok(app_config) => app_config,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("{}", err);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if !app_config.urls().is_empty() {
|
if !app_config.urls.is_empty() {
|
||||||
download(app_config);
|
run(app_config);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn download(app_config: AppConfig) {
|
fn run(app_config: AppConfig) {
|
||||||
let mut errors = Vec::new();
|
let mut errors = Vec::new();
|
||||||
let mut partial_download_count: usize = 0;
|
let mut partial_downloads = Vec::new();
|
||||||
let bar = if app_config.can_disable_progress_bar() {
|
|
||||||
|
if let Some(dir_name) = &app_config.output_directory {
|
||||||
|
let noun = if app_config.urls.len() > 1 {
|
||||||
|
"articles"
|
||||||
|
} else {
|
||||||
|
"article"
|
||||||
|
};
|
||||||
|
println!("Downloading {} to {}", noun, dir_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
let bar = if app_config.can_disable_progress_bar {
|
||||||
ProgressBar::hidden()
|
ProgressBar::hidden()
|
||||||
} else {
|
} else {
|
||||||
let enabled_bar = ProgressBar::new(app_config.urls().len() as u64);
|
let enabled_bar = ProgressBar::new(app_config.urls.len() as u64);
|
||||||
let style = ProgressStyle::default_bar().template(
|
let style = ProgressStyle::default_bar().template(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
||||||
);
|
);
|
||||||
enabled_bar.set_style(style);
|
enabled_bar.set_style(style);
|
||||||
enabled_bar.enable_steady_tick(500);
|
enabled_bar.enable_steady_tick(500);
|
||||||
enabled_bar
|
enabled_bar
|
||||||
};
|
};
|
||||||
let articles = task::block_on(async {
|
|
||||||
let urls_iter = app_config.urls().iter().map(|url| fetch_html(url));
|
let articles = download(&app_config, &bar, &mut partial_downloads, &mut errors);
|
||||||
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn());
|
|
||||||
let mut articles = Vec::new();
|
|
||||||
while let Some(fetch_result) = responses.next().await {
|
|
||||||
match fetch_result {
|
|
||||||
Ok((url, html)) => {
|
|
||||||
debug!("Extracting {}", &url);
|
|
||||||
let mut extractor = Extractor::from_html(&html, &url);
|
|
||||||
bar.set_message("Extracting...");
|
|
||||||
match extractor.extract_content() {
|
|
||||||
Ok(_) => {
|
|
||||||
extractor.extract_img_urls();
|
|
||||||
if let Err(img_errors) =
|
|
||||||
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
partial_download_count += 1;
|
|
||||||
warn!(
|
|
||||||
"{} image{} failed to download for {}",
|
|
||||||
img_errors.len(),
|
|
||||||
if img_errors.len() > 1 { "s" } else { "" },
|
|
||||||
url
|
|
||||||
);
|
|
||||||
for img_error in img_errors {
|
|
||||||
warn!(
|
|
||||||
"{}\n\t\tReason {}",
|
|
||||||
img_error.url().as_ref().unwrap(),
|
|
||||||
img_error
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
articles.push(extractor);
|
|
||||||
}
|
|
||||||
Err(mut e) => {
|
|
||||||
e.set_article_source(&url);
|
|
||||||
errors.push(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => errors.push(e),
|
|
||||||
}
|
|
||||||
bar.inc(1);
|
|
||||||
}
|
|
||||||
articles
|
|
||||||
});
|
|
||||||
bar.finish_with_message("Downloaded articles");
|
bar.finish_with_message("Downloaded articles");
|
||||||
|
|
||||||
let mut succesful_articles_table = Table::new();
|
let mut succesful_articles_table = Table::new();
|
||||||
|
@ -107,19 +75,24 @@ fn download(app_config: AppConfig) {
|
||||||
errors.extend(gen_epub_errors);
|
errors.extend(gen_epub_errors);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let has_errors = !errors.is_empty();
|
|
||||||
|
let has_errors = !errors.is_empty() || !partial_downloads.is_empty();
|
||||||
display_summary(
|
display_summary(
|
||||||
app_config.urls().len(),
|
app_config.urls.len(),
|
||||||
succesful_articles_table,
|
succesful_articles_table,
|
||||||
partial_download_count,
|
partial_downloads,
|
||||||
errors,
|
errors,
|
||||||
);
|
);
|
||||||
if app_config.is_logging_to_file() {
|
|
||||||
|
if app_config.is_logging_to_file {
|
||||||
println!(
|
println!(
|
||||||
"Log written to paperoni_{}.log\n",
|
"Log written to paperoni_{}.log\n",
|
||||||
app_config.start_time().format("%Y-%m-%d_%H-%M-%S")
|
app_config.start_time.format("%Y-%m-%d_%H-%M-%S")
|
||||||
);
|
);
|
||||||
|
} else if has_errors && !app_config.is_logging_to_file {
|
||||||
|
println!("\nRun paperoni with the --log-to-file flag to create a log file");
|
||||||
}
|
}
|
||||||
|
|
||||||
if has_errors {
|
if has_errors {
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -659,10 +659,24 @@ impl Readability {
|
||||||
.map(|node_ref| {
|
.map(|node_ref| {
|
||||||
let node_attrs = node_ref.attributes.borrow();
|
let node_attrs = node_ref.attributes.borrow();
|
||||||
let href = node_attrs.get("href").unwrap();
|
let href = node_attrs.get("href").unwrap();
|
||||||
if href.trim() == "/" {
|
|
||||||
document_uri.join("/").unwrap()
|
match Url::parse(href) {
|
||||||
} else {
|
Ok(url) => url,
|
||||||
Url::parse(href).unwrap()
|
Err(e) => match e {
|
||||||
|
url::ParseError::RelativeUrlWithoutBase => {
|
||||||
|
match document_uri.join(href) {
|
||||||
|
Ok(joined_url) => joined_url,
|
||||||
|
Err(e) => panic!(
|
||||||
|
"{:} unable to parse url {:?} on element {}",
|
||||||
|
e, href, &node_ref.name.local
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
e => panic!(
|
||||||
|
"{:} unable to parse url {:?} on element {}",
|
||||||
|
e, href, &node_ref.name.local
|
||||||
|
),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.next()
|
.next()
|
||||||
|
@ -1609,13 +1623,11 @@ impl Readability {
|
||||||
// // class name "comment", etc), and turn divs into P tags where they have been
|
// // class name "comment", etc), and turn divs into P tags where they have been
|
||||||
// // used inappropriately (as in, where they contain no other block level elements.)
|
// // used inappropriately (as in, where they contain no other block level elements.)
|
||||||
let mut elements_to_score: Vec<NodeRef> = Vec::new();
|
let mut elements_to_score: Vec<NodeRef> = Vec::new();
|
||||||
let mut node = Some(
|
let mut node = self
|
||||||
self.root_node
|
.root_node
|
||||||
.select_first("html")
|
.select_first("html")
|
||||||
.unwrap()
|
.ok()
|
||||||
.as_node()
|
.map(|n| n.as_node().clone());
|
||||||
.clone(),
|
|
||||||
);
|
|
||||||
|
|
||||||
while let Some(node_ref) = node {
|
while let Some(node_ref) = node {
|
||||||
let node_elem = node_ref.as_element().unwrap();
|
let node_elem = node_ref.as_element().unwrap();
|
||||||
|
|
Reference in a new issue