commit
474d97c6bd
12 changed files with 1424 additions and 281 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
||||||
/target
|
/target
|
||||||
*.epub
|
*.epub
|
||||||
|
*.log
|
480
Cargo.lock
generated
480
Cargo.lock
generated
|
@ -126,12 +126,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-global-executor"
|
name = "async-global-executor"
|
||||||
version = "1.4.3"
|
version = "2.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "73079b49cd26b8fd5a15f68fc7707fc78698dc2a3d61430f2a7a9430230dfa04"
|
checksum = "9586ec52317f36de58453159d48351bc244bc24ced3effc1fce22f3d48664af6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"async-channel",
|
||||||
"async-executor",
|
"async-executor",
|
||||||
"async-io",
|
"async-io",
|
||||||
|
"async-mutex",
|
||||||
|
"blocking",
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
@ -147,7 +150,7 @@ dependencies = [
|
||||||
"fastrand",
|
"fastrand",
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"libc",
|
"libc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"nb-connect",
|
"nb-connect",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"parking",
|
"parking",
|
||||||
|
@ -157,6 +160,15 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "async-lock"
|
||||||
|
version = "2.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b"
|
||||||
|
dependencies = [
|
||||||
|
"event-listener",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-mutex"
|
name = "async-mutex"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
@ -168,14 +180,14 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-std"
|
name = "async-std"
|
||||||
version = "1.7.0"
|
version = "1.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a7e82538bc65a25dbdff70e4c5439d52f068048ab97cdea0acd73f131594caa1"
|
checksum = "d9f06685bad74e0570f5213741bea82158279a4103d988e57bfada11ad230341"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"async-channel",
|
||||||
"async-global-executor",
|
"async-global-executor",
|
||||||
"async-io",
|
"async-io",
|
||||||
"async-mutex",
|
"async-lock",
|
||||||
"blocking",
|
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -183,11 +195,11 @@ dependencies = [
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"gloo-timers",
|
"gloo-timers",
|
||||||
"kv-log-macro",
|
"kv-log-macro",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"memchr",
|
"memchr",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pin-project-lite 0.1.11",
|
"pin-project-lite 0.2.4",
|
||||||
"pin-utils",
|
"pin-utils",
|
||||||
"slab",
|
"slab",
|
||||||
"wasm-bindgen-futures",
|
"wasm-bindgen-futures",
|
||||||
|
@ -394,6 +406,28 @@ dependencies = [
|
||||||
"vec_map",
|
"vec_map",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colored"
|
||||||
|
version = "2.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"lazy_static",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "comfy-table"
|
||||||
|
version = "2.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "17b99e9022e080d384b58d8eaf5976b42a311ff7a9669f8200eb2453c0b2b81a"
|
||||||
|
dependencies = [
|
||||||
|
"crossterm",
|
||||||
|
"strum",
|
||||||
|
"strum_macros",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "concurrent-queue"
|
name = "concurrent-queue"
|
||||||
version = "1.2.2"
|
version = "1.2.2"
|
||||||
|
@ -403,6 +437,21 @@ dependencies = [
|
||||||
"cache-padded",
|
"cache-padded",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.14.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"regex",
|
||||||
|
"terminal_size",
|
||||||
|
"unicode-width",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "const_fn"
|
name = "const_fn"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
|
@ -453,6 +502,31 @@ dependencies = [
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm"
|
||||||
|
version = "0.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"crossterm_winapi",
|
||||||
|
"lazy_static",
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"parking_lot",
|
||||||
|
"signal-hook",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm_winapi"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crypto-mac"
|
name = "crypto-mac"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
@ -490,6 +564,16 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ctor"
|
||||||
|
version = "0.1.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ctr"
|
name = "ctr"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
|
@ -530,6 +614,16 @@ dependencies = [
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dashmap"
|
||||||
|
version = "4.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"num_cpus",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "data-encoding"
|
name = "data-encoding"
|
||||||
version = "2.3.1"
|
version = "2.3.1"
|
||||||
|
@ -556,6 +650,26 @@ dependencies = [
|
||||||
"generic-array",
|
"generic-array",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "directories"
|
||||||
|
version = "3.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e69600ff1703123957937708eb27f7a564e48885c537782722ed0ba3189ce1d7"
|
||||||
|
dependencies = [
|
||||||
|
"dirs-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dirs-sys"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "03d86534ed367a67548dc68113a0f5db55432fdfbb6e6f9d77704397d95d5780"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"redox_users",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "discard"
|
name = "discard"
|
||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
|
@ -577,6 +691,12 @@ dependencies = [
|
||||||
"dtoa",
|
"dtoa",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encode_unicode"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.26"
|
version = "0.8.26"
|
||||||
|
@ -640,6 +760,22 @@ dependencies = [
|
||||||
"miniz_oxide 0.3.7",
|
"miniz_oxide 0.3.7",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "flexi_logger"
|
||||||
|
version = "0.17.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"chrono",
|
||||||
|
"glob",
|
||||||
|
"lazy_static",
|
||||||
|
"log 0.4.14",
|
||||||
|
"regex",
|
||||||
|
"thiserror",
|
||||||
|
"yansi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flume"
|
name = "flume"
|
||||||
version = "0.9.2"
|
version = "0.9.2"
|
||||||
|
@ -685,9 +821,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
|
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -700,9 +836,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
|
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
|
@ -710,15 +846,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
|
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
|
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
|
@ -727,9 +863,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
|
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-lite"
|
name = "futures-lite"
|
||||||
|
@ -748,9 +884,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
|
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -760,24 +896,21 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
|
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
|
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
||||||
dependencies = [
|
|
||||||
"once_cell",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.12"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
|
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
|
@ -823,6 +956,17 @@ dependencies = [
|
||||||
"wasi 0.9.0+wasi-snapshot-preview1",
|
"wasi 0.9.0+wasi-snapshot-preview1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "getrandom"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"libc",
|
||||||
|
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ghash"
|
name = "ghash"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
@ -838,6 +982,12 @@ version = "0.23.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
|
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gloo-timers"
|
name = "gloo-timers"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
|
@ -851,6 +1001,15 @@ dependencies = [
|
||||||
"web-sys",
|
"web-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermit-abi"
|
name = "hermit-abi"
|
||||||
version = "0.1.17"
|
version = "0.1.17"
|
||||||
|
@ -895,7 +1054,7 @@ version = "0.25.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"mac",
|
"mac",
|
||||||
"markup5ever",
|
"markup5ever",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
|
@ -916,15 +1075,17 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http-client"
|
name = "http-client"
|
||||||
version = "6.2.0"
|
version = "6.3.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "010092b71b94ee49293995625ce7a607778b8b4099c8088fa84fd66bd3e0f21c"
|
checksum = "5566ecc26bc6b04e773e680d66141fced78e091ad818e420d726c152b05a64ff"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"dashmap",
|
||||||
"http-types",
|
"http-types",
|
||||||
"isahc",
|
"isahc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -960,6 +1121,18 @@ dependencies = [
|
||||||
"unicode-normalization",
|
"unicode-normalization",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indicatif"
|
||||||
|
version = "0.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
|
||||||
|
dependencies = [
|
||||||
|
"console",
|
||||||
|
"lazy_static",
|
||||||
|
"number_prefix",
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "infer"
|
name = "infer"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
|
@ -988,7 +1161,7 @@ dependencies = [
|
||||||
"flume",
|
"flume",
|
||||||
"futures-lite",
|
"futures-lite",
|
||||||
"http",
|
"http",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"slab",
|
"slab",
|
||||||
"sluice",
|
"sluice",
|
||||||
|
@ -1031,7 +1204,7 @@ version = "1.0.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1042,9 +1215,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.80"
|
version = "0.2.93"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libnghttp2-sys"
|
name = "libnghttp2-sys"
|
||||||
|
@ -1083,16 +1256,17 @@ version = "0.3.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.11"
|
version = "0.4.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 1.0.0",
|
||||||
|
"value-bag",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1107,7 +1281,7 @@ version = "0.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"phf",
|
"phf",
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
"serde",
|
"serde",
|
||||||
|
@ -1171,6 +1345,28 @@ dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mio"
|
||||||
|
version = "0.7.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"log 0.4.14",
|
||||||
|
"miow",
|
||||||
|
"ntapi",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "miow"
|
||||||
|
version = "0.3.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mustache"
|
name = "mustache"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
|
@ -1203,6 +1399,15 @@ version = "0.1.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ntapi"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num-integer"
|
name = "num-integer"
|
||||||
version = "0.1.44"
|
version = "0.1.44"
|
||||||
|
@ -1232,6 +1437,12 @@ dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "number_prefix"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.22.0"
|
version = "0.22.0"
|
||||||
|
@ -1271,18 +1482,26 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.3.0-alpha1"
|
version = "0.4.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
|
"chrono",
|
||||||
"clap",
|
"clap",
|
||||||
|
"colored",
|
||||||
|
"comfy-table",
|
||||||
|
"directories",
|
||||||
"epub-builder",
|
"epub-builder",
|
||||||
|
"flexi_logger",
|
||||||
"futures",
|
"futures",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
|
"indicatif",
|
||||||
"kuchiki",
|
"kuchiki",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
|
"log 0.4.14",
|
||||||
"md5",
|
"md5",
|
||||||
"regex",
|
"regex",
|
||||||
"surf",
|
"surf",
|
||||||
|
"thiserror",
|
||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1292,6 +1511,31 @@ version = "2.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
|
||||||
|
dependencies = [
|
||||||
|
"instant",
|
||||||
|
"lock_api",
|
||||||
|
"parking_lot_core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "parking_lot_core"
|
||||||
|
version = "0.8.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"instant",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall 0.2.6",
|
||||||
|
"smallvec",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.1.0"
|
version = "2.1.0"
|
||||||
|
@ -1404,7 +1648,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 0.1.10",
|
||||||
"libc",
|
"libc",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"wepoll-sys",
|
"wepoll-sys",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
@ -1480,7 +1724,7 @@ version = "0.7.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom 0.1.15",
|
||||||
"libc",
|
"libc",
|
||||||
"rand_chacha",
|
"rand_chacha",
|
||||||
"rand_core 0.5.1",
|
"rand_core 0.5.1",
|
||||||
|
@ -1519,7 +1763,7 @@ version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom",
|
"getrandom 0.1.15",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -1556,22 +1800,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "redox_syscall"
|
||||||
version = "1.4.2"
|
version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "redox_users"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom 0.2.2",
|
||||||
|
"redox_syscall 0.2.6",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.4.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-syntax",
|
"regex-syntax",
|
||||||
"thread_local",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.6.21"
|
version = "0.6.23"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "remove_dir_all"
|
name = "remove_dir_all"
|
||||||
|
@ -1629,7 +1891,7 @@ dependencies = [
|
||||||
"cssparser",
|
"cssparser",
|
||||||
"derive_more",
|
"derive_more",
|
||||||
"fxhash",
|
"fxhash",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"matches",
|
"matches",
|
||||||
"phf",
|
"phf",
|
||||||
"phf_codegen",
|
"phf_codegen",
|
||||||
|
@ -1738,6 +2000,26 @@ dependencies = [
|
||||||
"opaque-debug",
|
"opaque-debug",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook"
|
||||||
|
version = "0.1.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"signal-hook-registry",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-registry"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "siphasher"
|
name = "siphasher"
|
||||||
version = "0.3.3"
|
version = "0.3.3"
|
||||||
|
@ -1763,9 +2045,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "smallvec"
|
name = "smallvec"
|
||||||
version = "1.5.0"
|
version = "1.6.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85"
|
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socket2"
|
name = "socket2"
|
||||||
|
@ -1775,7 +2057,7 @@ checksum = "2c29947abdee2a218277abeca306f25789c938e500ea5a9d4b12a5a504466902"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"libc",
|
"libc",
|
||||||
"redox_syscall",
|
"redox_syscall 0.1.57",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1883,6 +2165,24 @@ version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum"
|
||||||
|
version = "0.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strum_macros"
|
||||||
|
version = "0.20.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "subtle"
|
name = "subtle"
|
||||||
version = "2.3.0"
|
version = "2.3.0"
|
||||||
|
@ -1891,21 +2191,21 @@ checksum = "343f3f510c2915908f155e94f17220b19ccfacf2a64a2a5d8004f2c3e311e7fd"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "surf"
|
name = "surf"
|
||||||
version = "2.1.0"
|
version = "2.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7189c787d96fe18fef704950de76d590022d9d70858a4a201e1f07a0666882ea"
|
checksum = "2a154d33ca6b5e1fe6fd1c760e5a5cc1202425f6cca2e13229f16a69009f6328"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 1.0.0",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"http-client",
|
"http-client",
|
||||||
"http-types",
|
"http-types",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"mime_guess",
|
"mime_guess",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pin-project-lite 0.1.11",
|
"pin-project-lite 0.2.4",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"web-sys",
|
"web-sys",
|
||||||
|
@ -1943,6 +2243,16 @@ dependencies = [
|
||||||
"utf-8",
|
"utf-8",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "terminal_size"
|
||||||
|
version = "0.1.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "textwrap"
|
name = "textwrap"
|
||||||
version = "0.11.0"
|
version = "0.11.0"
|
||||||
|
@ -1960,33 +2270,24 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.22"
|
version = "1.0.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e"
|
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"thiserror-impl",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror-impl"
|
name = "thiserror-impl"
|
||||||
version = "1.0.22"
|
version = "1.0.24"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56"
|
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "thread_local"
|
|
||||||
version = "1.0.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
|
||||||
dependencies = [
|
|
||||||
"lazy_static",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "time"
|
name = "time"
|
||||||
version = "0.1.44"
|
version = "0.1.44"
|
||||||
|
@ -2058,7 +2359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
|
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 0.1.10",
|
"cfg-if 0.1.10",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"pin-project-lite 0.1.11",
|
"pin-project-lite 0.1.11",
|
||||||
"tracing-attributes",
|
"tracing-attributes",
|
||||||
"tracing-core",
|
"tracing-core",
|
||||||
|
@ -2127,6 +2428,12 @@ dependencies = [
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-segmentation"
|
||||||
|
version = "1.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-width"
|
name = "unicode-width"
|
||||||
version = "0.1.8"
|
version = "0.1.8"
|
||||||
|
@ -2151,9 +2458,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.2.0"
|
version = "2.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
|
checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"form_urlencoded",
|
"form_urlencoded",
|
||||||
"idna",
|
"idna",
|
||||||
|
@ -2183,6 +2490,15 @@ dependencies = [
|
||||||
"rand 0.7.3",
|
"rand 0.7.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "value-bag"
|
||||||
|
version = "1.0.0-alpha.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1"
|
||||||
|
dependencies = [
|
||||||
|
"ctor",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "vcpkg"
|
name = "vcpkg"
|
||||||
version = "0.2.10"
|
version = "0.2.10"
|
||||||
|
@ -2243,7 +2559,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log 0.4.11",
|
"log 0.4.14",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn",
|
"syn",
|
||||||
|
@ -2332,6 +2648,12 @@ version = "0.4.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yansi"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zip"
|
name = "zip"
|
||||||
version = "0.5.8"
|
version = "0.5.8"
|
||||||
|
|
21
Cargo.toml
21
Cargo.toml
|
@ -3,7 +3,7 @@ description = "A web article downloader"
|
||||||
homepage = "https://github.com/hipstermojo/paperoni"
|
homepage = "https://github.com/hipstermojo/paperoni"
|
||||||
repository = "https://github.com/hipstermojo/paperoni"
|
repository = "https://github.com/hipstermojo/paperoni"
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.3.0-alpha1"
|
version = "0.4.0-alpha1"
|
||||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -12,14 +12,23 @@ readme = "README.md"
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-std = "1.7.0"
|
async-std = "1.9.0"
|
||||||
|
# atty = "0.2.14"
|
||||||
|
chrono = "0.4.19"
|
||||||
clap = "2.33.3"
|
clap = "2.33.3"
|
||||||
|
colored = "2.0.0"
|
||||||
|
comfy-table = "2.1.0"
|
||||||
|
directories = "3.0.2"
|
||||||
epub-builder = "0.4.8"
|
epub-builder = "0.4.8"
|
||||||
futures = "0.3.12"
|
flexi_logger = "0.17.1"
|
||||||
|
futures = "0.3.14"
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
|
indicatif = "0.15.0"
|
||||||
kuchiki = "0.8.1"
|
kuchiki = "0.8.1"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
|
log = "0.4.14"
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
regex = "1.4.2"
|
regex = "1.4.5"
|
||||||
surf = "2.1.0"
|
surf = "2.2.0"
|
||||||
url = "2.2.0"
|
thiserror = "1.0.24"
|
||||||
|
url = "2.2.1"
|
||||||
|
|
44
README.md
44
README.md
|
@ -1,8 +1,10 @@
|
||||||
|
![crates.io](https://img.shields.io/crates/v/paperoni.svg)
|
||||||
|
|
||||||
<p align="center"><img src="./paperoni-dark.png"></p>
|
<p align="center"><img src="./paperoni-dark.png"></p>
|
||||||
|
|
||||||
<p align="center"><i>Salami not included</i></p>
|
<p align="center"><i>Salami not included</i></p>
|
||||||
|
|
||||||
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs.
|
||||||
|
|
||||||
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
||||||
|
|
||||||
|
@ -17,7 +19,7 @@ Check the [releases](https://github.com/hipstermojo/paperoni/releases) page for
|
||||||
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cargo install paperoni --version 0.3.0-alpha1
|
cargo install paperoni --version 0.4.0-alpha1
|
||||||
```
|
```
|
||||||
|
|
||||||
_Paperoni is still in alpha so the `version` flag has to be passed._
|
_Paperoni is still in alpha so the `version` flag has to be passed._
|
||||||
|
@ -37,6 +39,27 @@ cargo run -- # pass your url here
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
USAGE:
|
||||||
|
paperoni [OPTIONS] [urls]...
|
||||||
|
|
||||||
|
OPTIONS:
|
||||||
|
-f, --file <file> Input file containing links
|
||||||
|
-h, --help Prints help information
|
||||||
|
--log-to-file Enables logging of events to a file located in .paperoni/logs with a default log level
|
||||||
|
of debug. Use -v to specify the logging level
|
||||||
|
--max_conn <max_conn> The maximum number of concurrent HTTP connections when downloading articles. Default is
|
||||||
|
8
|
||||||
|
--merge <output_name> Merge multiple articles into a single epub
|
||||||
|
-V, --version Prints version information
|
||||||
|
-v Enables logging of events and set the verbosity level. Use -h to read on its usage
|
||||||
|
|
||||||
|
ARGS:
|
||||||
|
<urls>... Urls of web articles
|
||||||
|
```
|
||||||
|
|
||||||
|
To download a single article pass in its URL
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
paperoni https://en.wikipedia.org/wiki/Pepperoni
|
paperoni https://en.wikipedia.org/wiki/Pepperoni
|
||||||
```
|
```
|
||||||
|
@ -68,10 +91,23 @@ into a single epub using the `merge` flag and specifying the output file.
|
||||||
paperoni -f links.txt --merge out.epub
|
paperoni -f links.txt --merge out.epub
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Logging events
|
||||||
|
|
||||||
|
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
||||||
|
|
||||||
|
```
|
||||||
|
-v Logs only the error level
|
||||||
|
-vv Logs only the warn level
|
||||||
|
-vvv Logs only the info level
|
||||||
|
-vvvv Logs only the debug level
|
||||||
|
```
|
||||||
|
|
||||||
|
If only the `-v` flag is passed, the progress bars are disabled. If both `-v` and `--log-to-file` are passed then the progress bars will still be shown.
|
||||||
|
|
||||||
## How it works
|
## How it works
|
||||||
|
|
||||||
The URL passed to Paperoni is fetched and the returned HTML response is passed to the extractor.
|
The URL passed to Paperoni is fetched and the returned HTML response is passed to the extractor.
|
||||||
This extractor retrieves a possible article using a port of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
This extractor retrieves a possible article using a [custom port](https://github.com/hipstermojo/paperoni/blob/master/src/moz_readability/mod.rs) of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
||||||
|
|
||||||
> The port of the algorithm is still unstable as well so it is not fully compatible with all the websites that can be extracted using Readability.
|
> The port of the algorithm is still unstable as well so it is not fully compatible with all the websites that can be extracted using Readability.
|
||||||
|
|
||||||
|
@ -82,3 +118,5 @@ This program is still in alpha so a number of things won't work:
|
||||||
- Websites that only run with JavaScript cannot be extracted.
|
- Websites that only run with JavaScript cannot be extracted.
|
||||||
- Website articles that cannot be extracted by Readability cannot be extracted by Paperoni either.
|
- Website articles that cannot be extracted by Readability cannot be extracted by Paperoni either.
|
||||||
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
||||||
|
|
||||||
|
There are also web pages it won't work on in general such as Twitter and Reddit threads.
|
||||||
|
|
105
src/cli.rs
105
src/cli.rs
|
@ -1,6 +1,10 @@
|
||||||
use std::{fs::File, io::Read};
|
use std::{fs::File, io::Read, path::Path};
|
||||||
|
|
||||||
|
use chrono::{DateTime, Local};
|
||||||
use clap::{App, AppSettings, Arg};
|
use clap::{App, AppSettings, Arg};
|
||||||
|
use flexi_logger::LevelFilter as LogLevel;
|
||||||
|
|
||||||
|
use crate::logs::init_logger;
|
||||||
|
|
||||||
pub fn cli_init() -> AppConfig {
|
pub fn cli_init() -> AppConfig {
|
||||||
let app = App::new("paperoni")
|
let app = App::new("paperoni")
|
||||||
|
@ -8,12 +12,9 @@ pub fn cli_init() -> AppConfig {
|
||||||
AppSettings::ArgRequiredElseHelp,
|
AppSettings::ArgRequiredElseHelp,
|
||||||
AppSettings::UnifiedHelpMessage,
|
AppSettings::UnifiedHelpMessage,
|
||||||
])
|
])
|
||||||
.version("0.3.0-alpha1")
|
.version(clap::crate_version!())
|
||||||
.about(
|
.about(
|
||||||
"
|
"Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs",
|
||||||
Paperoni is an article downloader.
|
|
||||||
It takes a url and downloads the article content from it and saves it to an epub.
|
|
||||||
",
|
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("urls")
|
Arg::with_name("urls")
|
||||||
|
@ -38,8 +39,29 @@ It takes a url and downloads the article content from it and saves it to an epub
|
||||||
.long("max_conn")
|
.long("max_conn")
|
||||||
.help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
|
.help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
|
||||||
.long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
|
.long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
|
||||||
.takes_value(true));
|
.takes_value(true))
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("verbosity")
|
||||||
|
.short("v")
|
||||||
|
.multiple(true)
|
||||||
|
.help("Enables logging of events and set the verbosity level. Use --help to read on its usage")
|
||||||
|
.long_help(
|
||||||
|
"This takes upto 4 levels of verbosity in the following order.
|
||||||
|
- Error (-v)
|
||||||
|
- Warn (-vv)
|
||||||
|
- Info (-vvv)
|
||||||
|
- Debug (-vvvv)
|
||||||
|
When this flag is passed, it disables the progress bars and logs to stderr.
|
||||||
|
If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag."
|
||||||
|
)
|
||||||
|
.takes_value(false))
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("log-to-file")
|
||||||
|
.long("log-to-file")
|
||||||
|
.help("Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level")
|
||||||
|
.takes_value(false));
|
||||||
let arg_matches = app.get_matches();
|
let arg_matches = app.get_matches();
|
||||||
|
|
||||||
let mut urls: Vec<String> = match arg_matches.value_of("file") {
|
let mut urls: Vec<String> = match arg_matches.value_of("file") {
|
||||||
Some(file_name) => {
|
Some(file_name) => {
|
||||||
if let Ok(mut file) = File::open(file_name) {
|
if let Ok(mut file) = File::open(file_name) {
|
||||||
|
@ -76,14 +98,51 @@ It takes a url and downloads the article content from it and saves it to an epub
|
||||||
|
|
||||||
let mut app_config = AppConfig::new(max_conn);
|
let mut app_config = AppConfig::new(max_conn);
|
||||||
app_config.set_urls(urls);
|
app_config.set_urls(urls);
|
||||||
|
|
||||||
if let Some(name) = arg_matches.value_of("output_name") {
|
if let Some(name) = arg_matches.value_of("output_name") {
|
||||||
let file_name = if name.ends_with(".epub") && name.len() > 5 {
|
let file_path = Path::new(name);
|
||||||
|
if file_path.is_dir() {
|
||||||
|
eprintln!("{:?} is a directory", name);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
let file_name = if file_path.extension().is_some() {
|
||||||
name.to_owned()
|
name.to_owned()
|
||||||
} else {
|
} else {
|
||||||
name.to_owned() + ".epub"
|
name.to_owned() + ".epub"
|
||||||
};
|
};
|
||||||
app_config.set_merged(file_name);
|
|
||||||
|
match std::fs::File::create(&file_name) {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Unable to create file {:?}\n{}", file_path, e);
|
||||||
|
std::process::exit(1)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
app_config.merged = Some(file_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if arg_matches.is_present("verbosity") {
|
||||||
|
if !arg_matches.is_present("log-to-file") {
|
||||||
|
app_config.can_disable_progress_bar = true;
|
||||||
|
}
|
||||||
|
let log_levels: [LogLevel; 5] = [
|
||||||
|
LogLevel::Off,
|
||||||
|
LogLevel::Error,
|
||||||
|
LogLevel::Warn,
|
||||||
|
LogLevel::Info,
|
||||||
|
LogLevel::Debug,
|
||||||
|
];
|
||||||
|
let level = arg_matches.occurrences_of("verbosity").clamp(0, 4) as usize;
|
||||||
|
app_config.log_level = log_levels[level];
|
||||||
|
}
|
||||||
|
if arg_matches.is_present("log-to-file") {
|
||||||
|
app_config.log_level = LogLevel::Debug;
|
||||||
|
app_config.is_logging_to_file = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
init_logger(&app_config);
|
||||||
|
|
||||||
app_config
|
app_config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,6 +150,10 @@ pub struct AppConfig {
|
||||||
urls: Vec<String>,
|
urls: Vec<String>,
|
||||||
max_conn: usize,
|
max_conn: usize,
|
||||||
merged: Option<String>,
|
merged: Option<String>,
|
||||||
|
log_level: LogLevel,
|
||||||
|
can_disable_progress_bar: bool,
|
||||||
|
start_time: DateTime<Local>,
|
||||||
|
is_logging_to_file: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AppConfig {
|
impl AppConfig {
|
||||||
|
@ -99,6 +162,10 @@ impl AppConfig {
|
||||||
urls: vec![],
|
urls: vec![],
|
||||||
max_conn,
|
max_conn,
|
||||||
merged: None,
|
merged: None,
|
||||||
|
log_level: LogLevel::Off,
|
||||||
|
can_disable_progress_bar: false,
|
||||||
|
start_time: Local::now(),
|
||||||
|
is_logging_to_file: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,10 +173,6 @@ impl AppConfig {
|
||||||
self.urls.extend(urls);
|
self.urls.extend(urls);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_merged(&mut self, name: String) {
|
|
||||||
self.merged = Some(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn urls(&self) -> &Vec<String> {
|
pub fn urls(&self) -> &Vec<String> {
|
||||||
&self.urls
|
&self.urls
|
||||||
}
|
}
|
||||||
|
@ -120,4 +183,20 @@ impl AppConfig {
|
||||||
pub fn merged(&self) -> Option<&String> {
|
pub fn merged(&self) -> Option<&String> {
|
||||||
self.merged.as_ref()
|
self.merged.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn log_level(&self) -> LogLevel {
|
||||||
|
self.log_level
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn can_disable_progress_bar(&self) -> bool {
|
||||||
|
self.can_disable_progress_bar
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start_time(&self) -> &DateTime<Local> {
|
||||||
|
&self.start_time
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_logging_to_file(&self) -> bool {
|
||||||
|
self.is_logging_to_file
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
199
src/epub.rs
199
src/epub.rs
|
@ -1,32 +1,80 @@
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||||
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
||||||
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
use log::{debug, info};
|
||||||
|
|
||||||
use crate::extractor::{self, Extractor};
|
use crate::{
|
||||||
|
cli::AppConfig,
|
||||||
|
errors::PaperoniError,
|
||||||
|
extractor::{self, Extractor},
|
||||||
|
};
|
||||||
|
|
||||||
pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
pub fn generate_epubs(
|
||||||
match merged {
|
articles: Vec<Extractor>,
|
||||||
|
app_config: &AppConfig,
|
||||||
|
successful_articles_table: &mut Table,
|
||||||
|
) -> Result<(), Vec<PaperoniError>> {
|
||||||
|
let bar = if app_config.can_disable_progress_bar() {
|
||||||
|
ProgressBar::hidden()
|
||||||
|
} else {
|
||||||
|
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
||||||
|
let style = ProgressStyle::default_bar().template(
|
||||||
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
||||||
|
);
|
||||||
|
enabled_bar.set_style(style);
|
||||||
|
if !articles.is_empty() {
|
||||||
|
enabled_bar.set_message("Generating epubs");
|
||||||
|
}
|
||||||
|
enabled_bar
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut errors: Vec<PaperoniError> = Vec::new();
|
||||||
|
|
||||||
|
match app_config.merged() {
|
||||||
Some(name) => {
|
Some(name) => {
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
successful_articles_table.set_header(vec![Cell::new("Table of Contents")
|
||||||
|
.add_attribute(Attribute::Bold)
|
||||||
|
.set_alignment(CellAlignment::Center)
|
||||||
|
.fg(Color::Green)]);
|
||||||
|
|
||||||
|
let mut epub = match EpubBuilder::new(match ZipLibrary::new() {
|
||||||
|
Ok(zip_library) => zip_library,
|
||||||
|
Err(err) => {
|
||||||
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
|
paperoni_err.set_article_source(name);
|
||||||
|
errors.push(paperoni_err);
|
||||||
|
return Err(errors);
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
Ok(epub) => epub,
|
||||||
|
Err(err) => {
|
||||||
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
|
paperoni_err.set_article_source(name);
|
||||||
|
errors.push(paperoni_err);
|
||||||
|
return Err(errors);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
debug!("Creating {:?}", name);
|
||||||
epub.inline_toc();
|
epub.inline_toc();
|
||||||
epub = articles
|
articles
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.fold(epub, |mut epub, (idx, article)| {
|
.fold(&mut epub, |epub, (idx, article)| {
|
||||||
|
let mut article_result = || -> Result<(), PaperoniError> {
|
||||||
let mut html_buf = Vec::new();
|
let mut html_buf = Vec::new();
|
||||||
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
extractor::serialize_to_xhtml(article.article(), &mut html_buf)?;
|
||||||
.expect("Unable to serialize to xhtml");
|
let html_str = std::str::from_utf8(&html_buf)?;
|
||||||
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
epub.metadata("title", replace_metadata_value(name))?;
|
||||||
epub.metadata("title", replace_metadata_value(name))
|
|
||||||
.unwrap();
|
|
||||||
let section_name = article.metadata().title();
|
let section_name = article.metadata().title();
|
||||||
epub.add_content(
|
epub.add_content(
|
||||||
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
||||||
.title(replace_metadata_value(section_name)),
|
.title(replace_metadata_value(section_name)),
|
||||||
)
|
)?;
|
||||||
.unwrap();
|
info!("Adding images for {:?}", name);
|
||||||
|
|
||||||
article.img_urls.iter().for_each(|img| {
|
article.img_urls.iter().for_each(|img| {
|
||||||
|
// TODO: Add error handling and return errors as a vec
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
file_path.push(&img.0);
|
file_path.push(&img.0);
|
||||||
|
|
||||||
|
@ -38,15 +86,54 @@ pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
});
|
});
|
||||||
|
info!("Added images for {:?}", name);
|
||||||
|
Ok(())
|
||||||
|
};
|
||||||
|
if let Err(mut error) = article_result() {
|
||||||
|
error.set_article_source(&article.url);
|
||||||
|
errors.push(error);
|
||||||
|
}
|
||||||
|
bar.inc(1);
|
||||||
|
successful_articles_table.add_row(vec![article.metadata().title()]);
|
||||||
epub
|
epub
|
||||||
});
|
});
|
||||||
|
let appendix = generate_appendix(articles.iter().collect());
|
||||||
|
if let Err(err) = epub.add_content(
|
||||||
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||||
|
.title(replace_metadata_value("Article Sources")),
|
||||||
|
) {
|
||||||
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
|
paperoni_err.set_article_source(name);
|
||||||
|
errors.push(paperoni_err);
|
||||||
|
return Err(errors);
|
||||||
|
}
|
||||||
|
|
||||||
let mut out_file = File::create(&name).unwrap();
|
let mut out_file = File::create(&name).unwrap();
|
||||||
epub.generate(&mut out_file).unwrap();
|
match epub.generate(&mut out_file) {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(err) => {
|
||||||
|
let mut paperoni_err: PaperoniError = err.into();
|
||||||
|
paperoni_err.set_article_source(name);
|
||||||
|
errors.push(paperoni_err);
|
||||||
|
return Err(errors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bar.finish_with_message("Generated epub\n");
|
||||||
|
debug!("Created {:?}", name);
|
||||||
println!("Created {:?}", name);
|
println!("Created {:?}", name);
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
for article in articles {
|
successful_articles_table
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
.set_header(vec![Cell::new("Downloaded articles")
|
||||||
|
.add_attribute(Attribute::Bold)
|
||||||
|
.set_alignment(CellAlignment::Center)
|
||||||
|
.fg(Color::Green)])
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
|
||||||
|
for article in &articles {
|
||||||
|
let mut result = || -> Result<(), PaperoniError> {
|
||||||
|
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
||||||
let file_name = format!(
|
let file_name = format!(
|
||||||
"{}.epub",
|
"{}.epub",
|
||||||
article
|
article
|
||||||
|
@ -55,31 +142,54 @@ pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
||||||
.replace("/", " ")
|
.replace("/", " ")
|
||||||
.replace("\\", " ")
|
.replace("\\", " ")
|
||||||
);
|
);
|
||||||
|
debug!("Creating {:?}", file_name);
|
||||||
let mut out_file = File::create(&file_name).unwrap();
|
let mut out_file = File::create(&file_name).unwrap();
|
||||||
let mut html_buf = Vec::new();
|
let mut html_buf = Vec::new();
|
||||||
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
||||||
.expect("Unable to serialize to xhtml");
|
.expect("Unable to serialize to xhtml");
|
||||||
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
||||||
if let Some(author) = article.metadata().byline() {
|
if let Some(author) = article.metadata().byline() {
|
||||||
epub.metadata("author", replace_metadata_value(author))
|
epub.metadata("author", replace_metadata_value(author))?;
|
||||||
.unwrap();
|
|
||||||
}
|
}
|
||||||
epub.metadata("title", replace_metadata_value(article.metadata().title()))
|
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
||||||
.unwrap();
|
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
||||||
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))
|
for img in &article.img_urls {
|
||||||
.unwrap();
|
|
||||||
for img in article.img_urls {
|
|
||||||
let mut file_path = std::env::temp_dir();
|
let mut file_path = std::env::temp_dir();
|
||||||
file_path.push(&img.0);
|
file_path.push(&img.0);
|
||||||
|
|
||||||
let img_buf = File::open(&file_path).expect("Can't read file");
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
||||||
epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())
|
epub.add_resource(
|
||||||
.unwrap();
|
file_path.file_name().unwrap(),
|
||||||
|
img_buf,
|
||||||
|
img.1.as_ref().unwrap(),
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
epub.generate(&mut out_file).unwrap();
|
let appendix = generate_appendix(vec![&article]);
|
||||||
println!("Created {:?}", file_name);
|
epub.add_content(
|
||||||
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||||
|
.title(replace_metadata_value("Article Source")),
|
||||||
|
)?;
|
||||||
|
epub.generate(&mut out_file)?;
|
||||||
|
bar.inc(1);
|
||||||
|
|
||||||
|
successful_articles_table.add_row(vec![article.metadata().title()]);
|
||||||
|
|
||||||
|
debug!("Created {:?}", file_name);
|
||||||
|
Ok(())
|
||||||
|
};
|
||||||
|
if let Err(mut error) = result() {
|
||||||
|
error.set_article_source(&article.url);
|
||||||
|
errors.push(error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bar.finish_with_message("Generated epubs\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(errors)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,6 +201,37 @@ fn replace_metadata_value(value: &str) -> String {
|
||||||
.replace(">", ">")
|
.replace(">", ">")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO: The type signature of the argument should change as it requires that merged articles create an entirely new Vec of references
|
||||||
|
fn generate_appendix(articles: Vec<&Extractor>) -> String {
|
||||||
|
let link_tags: String = articles
|
||||||
|
.iter()
|
||||||
|
.map(|article| {
|
||||||
|
let article_name = if !article.metadata().title().is_empty() {
|
||||||
|
article.metadata().title()
|
||||||
|
} else {
|
||||||
|
&article.url
|
||||||
|
};
|
||||||
|
format!(
|
||||||
|
"<a href=\"{}\">{}</a><br></br>",
|
||||||
|
replace_metadata_value(&article.url),
|
||||||
|
replace_metadata_value(article_name)
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let template = format!(
|
||||||
|
r#"<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||||
|
<head>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h2>Appendix</h2><h3>Article sources</h3>
|
||||||
|
{}
|
||||||
|
</body>
|
||||||
|
</html>"#,
|
||||||
|
link_tags
|
||||||
|
);
|
||||||
|
template
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::replace_metadata_value;
|
use super::replace_metadata_value;
|
||||||
|
|
126
src/errors.rs
Normal file
126
src/errors.rs
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum ErrorKind {
|
||||||
|
#[error("[EpubError]: {0}")]
|
||||||
|
EpubError(String),
|
||||||
|
#[error("[HTTPError]: {0}")]
|
||||||
|
HTTPError(String),
|
||||||
|
#[error("[IOError]: {0}")]
|
||||||
|
IOError(String),
|
||||||
|
#[error("[UTF8Error]: {0}")]
|
||||||
|
UTF8Error(String),
|
||||||
|
#[error("[ReadabilityError]: {0}")]
|
||||||
|
ReadabilityError(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
#[error("{kind}")]
|
||||||
|
/// Used to represent errors from downloading images. Errors from here are used solely for debugging
|
||||||
|
/// as they are considered recoverable.
|
||||||
|
pub struct ImgError {
|
||||||
|
kind: ErrorKind,
|
||||||
|
url: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImgError {
|
||||||
|
pub fn with_kind(kind: ErrorKind) -> Self {
|
||||||
|
ImgError { url: None, kind }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_url(&mut self, url: &str) {
|
||||||
|
self.url = Some(url.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn url(&self) -> &Option<String> {
|
||||||
|
&self.url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ErrorKind> for ImgError {
|
||||||
|
fn from(kind: ErrorKind) -> Self {
|
||||||
|
ImgError::with_kind(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<surf::Error> for ImgError {
|
||||||
|
fn from(err: surf::Error) -> Self {
|
||||||
|
ImgError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<url::ParseError> for ImgError {
|
||||||
|
fn from(err: url::ParseError) -> Self {
|
||||||
|
ImgError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for ImgError {
|
||||||
|
fn from(err: std::io::Error) -> Self {
|
||||||
|
ImgError::with_kind(ErrorKind::IOError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
#[error("{kind}")]
|
||||||
|
pub struct PaperoniError {
|
||||||
|
article_source: Option<String>,
|
||||||
|
kind: ErrorKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PaperoniError {
|
||||||
|
pub fn with_kind(kind: ErrorKind) -> Self {
|
||||||
|
PaperoniError {
|
||||||
|
article_source: None,
|
||||||
|
kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn kind(&self) -> &ErrorKind {
|
||||||
|
&self.kind
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn article_source(&self) -> &Option<String> {
|
||||||
|
&self.article_source
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_article_source(&mut self, article_source: &str) {
|
||||||
|
self.article_source = Some(article_source.to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ErrorKind> for PaperoniError {
|
||||||
|
fn from(kind: ErrorKind) -> Self {
|
||||||
|
PaperoniError::with_kind(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<epub_builder::Error> for PaperoniError {
|
||||||
|
fn from(err: epub_builder::Error) -> Self {
|
||||||
|
PaperoniError::with_kind(ErrorKind::EpubError(err.description().to_owned()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<surf::Error> for PaperoniError {
|
||||||
|
fn from(err: surf::Error) -> Self {
|
||||||
|
PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<url::ParseError> for PaperoniError {
|
||||||
|
fn from(err: url::ParseError) -> Self {
|
||||||
|
PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::io::Error> for PaperoniError {
|
||||||
|
fn from(err: std::io::Error) -> Self {
|
||||||
|
PaperoniError::with_kind(ErrorKind::IOError(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<std::str::Utf8Error> for PaperoniError {
|
||||||
|
fn from(err: std::str::Utf8Error) -> Self {
|
||||||
|
PaperoniError::with_kind(ErrorKind::UTF8Error(err.to_string()))
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
||||||
|
|
||||||
use kuchiki::{traits::*, NodeRef};
|
use kuchiki::{traits::*, NodeRef};
|
||||||
|
|
||||||
|
use crate::errors::PaperoniError;
|
||||||
use crate::moz_readability::{MetaData, Readability};
|
use crate::moz_readability::{MetaData, Readability};
|
||||||
|
|
||||||
pub type ResourceInfo = (String, Option<String>);
|
pub type ResourceInfo = (String, Option<String>);
|
||||||
|
@ -14,22 +15,24 @@ pub struct Extractor {
|
||||||
article: Option<NodeRef>,
|
article: Option<NodeRef>,
|
||||||
pub img_urls: Vec<ResourceInfo>,
|
pub img_urls: Vec<ResourceInfo>,
|
||||||
readability: Readability,
|
readability: Readability,
|
||||||
|
pub url: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Extractor {
|
impl Extractor {
|
||||||
/// Create a new instance of an HTML extractor given an HTML string
|
/// Create a new instance of an HTML extractor given an HTML string
|
||||||
pub fn from_html(html_str: &str) -> Self {
|
pub fn from_html(html_str: &str, url: &str) -> Self {
|
||||||
Extractor {
|
Extractor {
|
||||||
article: None,
|
article: None,
|
||||||
img_urls: Vec::new(),
|
img_urls: Vec::new(),
|
||||||
readability: Readability::new(html_str),
|
readability: Readability::new(html_str),
|
||||||
|
url: url.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Locates and extracts the HTML in a document which is determined to be
|
/// Locates and extracts the HTML in a document which is determined to be
|
||||||
/// the source of the content
|
/// the source of the content
|
||||||
pub fn extract_content(&mut self, url: &str) {
|
pub fn extract_content(&mut self) -> Result<(), PaperoniError> {
|
||||||
self.readability.parse(url);
|
self.readability.parse(&self.url)?;
|
||||||
if let Some(article_node_ref) = &self.readability.article_node {
|
if let Some(article_node_ref) = &self.readability.article_node {
|
||||||
let template = r#"
|
let template = r#"
|
||||||
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||||
|
@ -44,6 +47,7 @@ impl Extractor {
|
||||||
body.as_node().append(article_node_ref.clone());
|
body.as_node().append(article_node_ref.clone());
|
||||||
self.article = Some(doc);
|
self.article = Some(doc);
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
||||||
|
@ -61,8 +65,11 @@ impl Extractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn article(&self) -> Option<&NodeRef> {
|
/// Returns the extracted article [NodeRef]. It should only be called *AFTER* calling parse
|
||||||
self.article.as_ref()
|
pub fn article(&self) -> &NodeRef {
|
||||||
|
self.article.as_ref().expect(
|
||||||
|
"Article node doesn't exist. This may be because the document has not been parsed",
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn metadata(&self) -> &MetaData {
|
pub fn metadata(&self) -> &MetaData {
|
||||||
|
@ -75,7 +82,7 @@ impl Extractor {
|
||||||
pub fn serialize_to_xhtml<W: std::io::Write>(
|
pub fn serialize_to_xhtml<W: std::io::Write>(
|
||||||
node_ref: &NodeRef,
|
node_ref: &NodeRef,
|
||||||
mut w: &mut W,
|
mut w: &mut W,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), PaperoniError> {
|
||||||
let mut escape_map = HashMap::new();
|
let mut escape_map = HashMap::new();
|
||||||
escape_map.insert("<", "<");
|
escape_map.insert("<", "<");
|
||||||
escape_map.insert(">", ">");
|
escape_map.insert(">", ">");
|
||||||
|
@ -96,6 +103,7 @@ pub fn serialize_to_xhtml<W: std::io::Write>(
|
||||||
let attrs_str = attrs
|
let attrs_str = attrs
|
||||||
.map
|
.map
|
||||||
.iter()
|
.iter()
|
||||||
|
.filter(|(k, _)| &k.local != "\"")
|
||||||
.map(|(k, v)| {
|
.map(|(k, v)| {
|
||||||
format!(
|
format!(
|
||||||
"{}=\"{}\"",
|
"{}=\"{}\"",
|
||||||
|
@ -156,8 +164,10 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_extract_img_urls() {
|
fn test_extract_img_urls() {
|
||||||
let mut extractor = Extractor::from_html(TEST_HTML);
|
let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/");
|
||||||
extractor.extract_content("http://example.com/");
|
extractor
|
||||||
|
.extract_content()
|
||||||
|
.expect("Article extraction failed unexpectedly");
|
||||||
extractor.extract_img_urls();
|
extractor.extract_img_urls();
|
||||||
|
|
||||||
assert!(extractor.img_urls.len() > 0);
|
assert!(extractor.img_urls.len() > 0);
|
||||||
|
|
139
src/http.rs
139
src/http.rs
|
@ -1,18 +1,19 @@
|
||||||
use async_std::io::prelude::*;
|
use async_std::io::prelude::*;
|
||||||
use async_std::{fs::File, stream};
|
use async_std::{fs::File, stream};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
|
use indicatif::ProgressBar;
|
||||||
|
use log::{debug, info};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::errors::{ErrorKind, ImgError, PaperoniError};
|
||||||
use crate::extractor::Extractor;
|
use crate::extractor::Extractor;
|
||||||
|
|
||||||
type HTMLResource = (String, String);
|
type HTMLResource = (String, String);
|
||||||
|
|
||||||
pub async fn fetch_url(
|
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
url: &str,
|
|
||||||
) -> Result<HTMLResource, Box<dyn std::error::Error + Send + Sync>> {
|
|
||||||
let client = surf::Client::new();
|
let client = surf::Client::new();
|
||||||
println!("Fetching...");
|
debug!("Fetching {}", url);
|
||||||
|
|
||||||
|
let process_request = async {
|
||||||
let mut redirect_count: u8 = 0;
|
let mut redirect_count: u8 = 0;
|
||||||
let base_url = Url::parse(&url)?;
|
let base_url = Url::parse(&url)?;
|
||||||
let mut url = base_url.clone();
|
let mut url = base_url.clone();
|
||||||
|
@ -23,10 +24,19 @@ pub async fn fetch_url(
|
||||||
if res.status().is_redirection() {
|
if res.status().is_redirection() {
|
||||||
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
||||||
match Url::parse(location.last().as_str()) {
|
match Url::parse(location.last().as_str()) {
|
||||||
Ok(valid_url) => url = valid_url,
|
Ok(valid_url) => {
|
||||||
|
info!("Redirecting {} to {}", url, valid_url);
|
||||||
|
url = valid_url
|
||||||
|
}
|
||||||
Err(e) => match e {
|
Err(e) => match e {
|
||||||
url::ParseError::RelativeUrlWithoutBase => {
|
url::ParseError::RelativeUrlWithoutBase => {
|
||||||
url = base_url.join(location.last().as_str())?
|
match base_url.join(location.last().as_str()) {
|
||||||
|
Ok(joined_url) => {
|
||||||
|
info!("Redirecting {} to {}", url, joined_url);
|
||||||
|
url = joined_url;
|
||||||
|
}
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
e => return Err(e.into()),
|
e => return Err(e.into()),
|
||||||
},
|
},
|
||||||
|
@ -35,31 +45,46 @@ pub async fn fetch_url(
|
||||||
} else if res.status().is_success() {
|
} else if res.status().is_success() {
|
||||||
if let Some(mime) = res.content_type() {
|
if let Some(mime) = res.content_type() {
|
||||||
if mime.essence() == "text/html" {
|
if mime.essence() == "text/html" {
|
||||||
|
debug!("Successfully fetched {}", url);
|
||||||
return Ok((url.to_string(), res.body_string().await?));
|
return Ok((url.to_string(), res.body_string().await?));
|
||||||
} else {
|
} else {
|
||||||
return Err(format!(
|
let msg = format!(
|
||||||
"Invalid HTTP response. Received {} instead of text/html",
|
"Invalid HTTP response. Received {} instead of text/html",
|
||||||
mime.essence()
|
mime.essence()
|
||||||
)
|
);
|
||||||
.into());
|
|
||||||
|
return Err(ErrorKind::HTTPError(msg).into());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err("Unknown HTTP response".into());
|
return Err(ErrorKind::HTTPError("Unknown HTTP response".to_owned()).into());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(format!("Request failed: HTTP {}", res.status()).into());
|
let msg = format!("Request failed: HTTP {}", res.status());
|
||||||
|
return Err(ErrorKind::HTTPError(msg).into());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err("Unable to fetch HTML".into())
|
Err(ErrorKind::HTTPError("Unable to fetch HTML".to_owned()).into())
|
||||||
|
};
|
||||||
|
|
||||||
|
process_request.await.map_err(|mut error: PaperoniError| {
|
||||||
|
error.set_article_source(url);
|
||||||
|
error
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn download_images(
|
pub async fn download_images(
|
||||||
extractor: &mut Extractor,
|
extractor: &mut Extractor,
|
||||||
article_origin: &Url,
|
article_origin: &Url,
|
||||||
) -> async_std::io::Result<()> {
|
bar: &ProgressBar,
|
||||||
|
) -> Result<(), Vec<ImgError>> {
|
||||||
if extractor.img_urls.len() > 0 {
|
if extractor.img_urls.len() > 0 {
|
||||||
println!("Downloading images...");
|
debug!(
|
||||||
|
"Downloading {} images for {}",
|
||||||
|
extractor.img_urls.len(),
|
||||||
|
article_origin
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
let img_count = extractor.img_urls.len();
|
||||||
|
|
||||||
let imgs_req_iter = extractor
|
let imgs_req_iter = extractor
|
||||||
.img_urls
|
.img_urls
|
||||||
|
@ -67,31 +92,49 @@ pub async fn download_images(
|
||||||
.map(|(url, _)| {
|
.map(|(url, _)| {
|
||||||
(
|
(
|
||||||
url,
|
url,
|
||||||
surf::Client::new().get(get_absolute_url(&url, article_origin)),
|
surf::Client::new()
|
||||||
|
.with(surf::middleware::Redirect::default())
|
||||||
|
.get(get_absolute_url(&url, article_origin)),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.map(|(url, req)| async move {
|
.enumerate()
|
||||||
let mut img_response = req.await.expect("Unable to retrieve image");
|
.map(|(img_idx, (url, req))| async move {
|
||||||
let img_content: Vec<u8> = img_response.body_bytes().await.unwrap();
|
bar.set_message(format!("Downloading images [{}/{}]", img_idx + 1, img_count).as_str());
|
||||||
|
match req.await {
|
||||||
|
Ok(mut img_response) => {
|
||||||
|
let process_response = async {
|
||||||
|
let img_content: Vec<u8> = match img_response.body_bytes().await {
|
||||||
|
Ok(bytes) => bytes,
|
||||||
|
Err(e) => return Err(e.into()),
|
||||||
|
};
|
||||||
let img_mime = img_response
|
let img_mime = img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| mime.essence().to_string());
|
.map(|mime| mime.essence().to_string());
|
||||||
let img_ext = img_response
|
let img_ext = match img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
||||||
.unwrap();
|
{
|
||||||
|
Some(mime_str) => mime_str,
|
||||||
|
None => {
|
||||||
|
return Err(ErrorKind::HTTPError(
|
||||||
|
"Image has no Content-Type".to_owned(),
|
||||||
|
)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let mut img_path = std::env::temp_dir();
|
let mut img_path = std::env::temp_dir();
|
||||||
img_path.push(format!("{}.{}", hash_url(&url), &img_ext));
|
img_path.push(format!("{}.{}", hash_url(&url), &img_ext));
|
||||||
let mut img_file = File::create(&img_path)
|
let mut img_file = match File::create(&img_path).await {
|
||||||
.await
|
Ok(file) => file,
|
||||||
.expect("Unable to create file");
|
Err(e) => return Err(e.into()),
|
||||||
img_file
|
};
|
||||||
.write_all(&img_content)
|
match img_file.write_all(&img_content).await {
|
||||||
.await
|
Ok(_) => (),
|
||||||
.expect("Unable to save to file");
|
Err(e) => return Err(e.into()),
|
||||||
|
}
|
||||||
|
|
||||||
(
|
Ok((
|
||||||
url,
|
url,
|
||||||
img_path
|
img_path
|
||||||
.file_name()
|
.file_name()
|
||||||
|
@ -103,7 +146,19 @@ pub async fn download_images(
|
||||||
})
|
})
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
img_mime,
|
img_mime,
|
||||||
)
|
))
|
||||||
|
};
|
||||||
|
process_response.await.map_err(|mut e: ImgError| {
|
||||||
|
e.set_url(url);
|
||||||
|
e
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let mut img_err: ImgError = e.into();
|
||||||
|
img_err.set_url(url);
|
||||||
|
Err(img_err)
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// A utility closure used when update the value of an image source after downloading is successful
|
// A utility closure used when update the value of an image source after downloading is successful
|
||||||
|
@ -112,8 +167,6 @@ pub async fn download_images(
|
||||||
let (img_url, img_path, img_mime) = img_item;
|
let (img_url, img_path, img_mime) = img_item;
|
||||||
let img_ref = extractor
|
let img_ref = extractor
|
||||||
.article()
|
.article()
|
||||||
.as_mut()
|
|
||||||
.expect("Unable to get mutable ref")
|
|
||||||
.select_first(&format!("img[src='{}']", img_url))
|
.select_first(&format!("img[src='{}']", img_url))
|
||||||
.expect("Image node does not exist");
|
.expect("Image node does not exist");
|
||||||
let mut img_node = img_ref.attributes.borrow_mut();
|
let mut img_node = img_ref.attributes.borrow_mut();
|
||||||
|
@ -124,14 +177,24 @@ pub async fn download_images(
|
||||||
(img_path, img_mime)
|
(img_path, img_mime)
|
||||||
};
|
};
|
||||||
|
|
||||||
extractor.img_urls = stream::from_iter(imgs_req_iter)
|
let imgs_req_iter = stream::from_iter(imgs_req_iter)
|
||||||
.buffered(10)
|
.buffered(10)
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<Result<_, ImgError>>>()
|
||||||
.await
|
.await;
|
||||||
.into_iter()
|
let mut errors = Vec::new();
|
||||||
.map(replace_existing_img_src)
|
let mut replaced_imgs = Vec::new();
|
||||||
.collect();
|
for img_req_result in imgs_req_iter {
|
||||||
|
match img_req_result {
|
||||||
|
Ok(img_req) => replaced_imgs.push(replace_existing_img_src(img_req)),
|
||||||
|
Err(e) => errors.push(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
extractor.img_urls = replaced_imgs;
|
||||||
|
if errors.is_empty() {
|
||||||
Ok(())
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(errors)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handles getting the extension from a given MIME subtype.
|
/// Handles getting the extension from a given MIME subtype.
|
||||||
|
|
260
src/logs.rs
Normal file
260
src/logs.rs
Normal file
|
@ -0,0 +1,260 @@
|
||||||
|
use colored::*;
|
||||||
|
use comfy_table::presets::UTF8_HORIZONTAL_BORDERS_ONLY;
|
||||||
|
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
||||||
|
use directories::UserDirs;
|
||||||
|
use flexi_logger::LogSpecBuilder;
|
||||||
|
use log::error;
|
||||||
|
|
||||||
|
use crate::{cli::AppConfig, errors::PaperoniError};
|
||||||
|
|
||||||
|
pub fn display_summary(
|
||||||
|
initial_article_count: usize,
|
||||||
|
succesful_articles_table: Table,
|
||||||
|
partial_downloads_count: usize,
|
||||||
|
errors: Vec<PaperoniError>,
|
||||||
|
) {
|
||||||
|
let successfully_downloaded_count =
|
||||||
|
initial_article_count - partial_downloads_count - errors.len();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
short_summary(DownloadCount::new(
|
||||||
|
initial_article_count,
|
||||||
|
successfully_downloaded_count,
|
||||||
|
partial_downloads_count,
|
||||||
|
errors.len()
|
||||||
|
))
|
||||||
|
.bold()
|
||||||
|
);
|
||||||
|
|
||||||
|
if successfully_downloaded_count > 0 {
|
||||||
|
println!("{}", succesful_articles_table);
|
||||||
|
}
|
||||||
|
if !errors.is_empty() {
|
||||||
|
println!("\n{}", "Failed article downloads".bright_red().bold());
|
||||||
|
let mut table_failed = Table::new();
|
||||||
|
table_failed
|
||||||
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
|
.set_header(vec![
|
||||||
|
Cell::new("Link").set_alignment(CellAlignment::Center),
|
||||||
|
Cell::new("Reason").set_alignment(CellAlignment::Center),
|
||||||
|
])
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
|
||||||
|
for error in errors {
|
||||||
|
let error_source = error
|
||||||
|
.article_source()
|
||||||
|
.clone()
|
||||||
|
.unwrap_or_else(|| "<unknown link>".to_string());
|
||||||
|
table_failed.add_row(vec![&error_source, &format!("{}", error.kind())]);
|
||||||
|
error!("{}\n - {}", error, error_source);
|
||||||
|
}
|
||||||
|
println!("{}", table_failed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a string summary of the total number of failed and successful article downloads
|
||||||
|
fn short_summary(download_count: DownloadCount) -> String {
|
||||||
|
// TODO: Refactor this
|
||||||
|
if download_count.total
|
||||||
|
!= download_count.successful + download_count.failed + download_count.partial
|
||||||
|
{
|
||||||
|
panic!("initial_count must be equal to the sum of failed and successful count")
|
||||||
|
}
|
||||||
|
let get_noun = |count: usize| if count == 1 { "article" } else { "articles" };
|
||||||
|
if download_count.successful == download_count.total && download_count.successful == 1 {
|
||||||
|
"Article downloaded successfully".green().to_string()
|
||||||
|
} else if download_count.total == download_count.failed && download_count.failed == 1 {
|
||||||
|
"Article failed to download".red().to_string()
|
||||||
|
} else if download_count.total == download_count.partial && download_count.partial == 1 {
|
||||||
|
"Article partially failed to download".yellow().to_string()
|
||||||
|
} else if download_count.successful == download_count.total {
|
||||||
|
"All articles downloaded successfully".green().to_string()
|
||||||
|
} else if download_count.failed == download_count.total {
|
||||||
|
"All articles failed to download".red().to_string()
|
||||||
|
} else if download_count.partial == download_count.total {
|
||||||
|
"All articles partially failed to download"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
} else if download_count.partial == 0 {
|
||||||
|
format!(
|
||||||
|
"{} {} downloaded successfully, {} {} failed",
|
||||||
|
download_count.successful,
|
||||||
|
get_noun(download_count.successful),
|
||||||
|
download_count.failed,
|
||||||
|
get_noun(download_count.failed)
|
||||||
|
)
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
} else if download_count.successful == 0
|
||||||
|
&& download_count.partial > 0
|
||||||
|
&& download_count.failed > 0
|
||||||
|
{
|
||||||
|
format!(
|
||||||
|
"{} {} partially failed to download, {} {} failed",
|
||||||
|
download_count.partial,
|
||||||
|
get_noun(download_count.partial),
|
||||||
|
download_count.failed,
|
||||||
|
get_noun(download_count.failed)
|
||||||
|
)
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
} else if download_count.failed == 0
|
||||||
|
&& download_count.successful > 0
|
||||||
|
&& download_count.partial > 0
|
||||||
|
{
|
||||||
|
format!(
|
||||||
|
"{} {} downloaded successfully, {} {} partially failed to download",
|
||||||
|
download_count.successful,
|
||||||
|
get_noun(download_count.successful),
|
||||||
|
download_count.partial,
|
||||||
|
get_noun(download_count.partial)
|
||||||
|
)
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"{} {} downloaded successfully, {} {} partially failed to download, {} {} failed",
|
||||||
|
download_count.successful,
|
||||||
|
get_noun(download_count.successful),
|
||||||
|
download_count.partial,
|
||||||
|
get_noun(download_count.partial),
|
||||||
|
download_count.failed,
|
||||||
|
get_noun(download_count.failed)
|
||||||
|
)
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct DownloadCount {
|
||||||
|
total: usize,
|
||||||
|
successful: usize,
|
||||||
|
partial: usize,
|
||||||
|
failed: usize,
|
||||||
|
}
|
||||||
|
impl DownloadCount {
|
||||||
|
fn new(total: usize, successful: usize, partial: usize, failed: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
total,
|
||||||
|
successful,
|
||||||
|
partial,
|
||||||
|
failed,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init_logger(app_config: &AppConfig) {
|
||||||
|
match UserDirs::new() {
|
||||||
|
Some(user_dirs) => {
|
||||||
|
let home_dir = user_dirs.home_dir();
|
||||||
|
let paperoni_dir = home_dir.join(".paperoni");
|
||||||
|
let log_dir = paperoni_dir.join("logs");
|
||||||
|
|
||||||
|
let log_spec = LogSpecBuilder::new()
|
||||||
|
.module("paperoni", app_config.log_level())
|
||||||
|
.build();
|
||||||
|
let formatted_timestamp = app_config.start_time().format("%Y-%m-%d_%H-%M-%S");
|
||||||
|
let mut logger = flexi_logger::Logger::with(log_spec);
|
||||||
|
|
||||||
|
if app_config.is_logging_to_file() && (!paperoni_dir.is_dir() || !log_dir.is_dir()) {
|
||||||
|
match std::fs::create_dir_all(&log_dir) {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Unable to create paperoni directories on home directory for logging purposes\n{}",e);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if app_config.is_logging_to_file() {
|
||||||
|
logger = logger
|
||||||
|
.directory(log_dir)
|
||||||
|
.discriminant(formatted_timestamp.to_string())
|
||||||
|
.suppress_timestamp()
|
||||||
|
.log_to_file();
|
||||||
|
}
|
||||||
|
|
||||||
|
match logger.start() {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(e) => eprintln!("Unable to start logger!\n{}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => eprintln!("Unable to get user directories for logging purposes"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::{short_summary, DownloadCount};
|
||||||
|
use colored::*;
|
||||||
|
#[test]
|
||||||
|
fn test_short_summary() {
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(1, 1, 0, 0)),
|
||||||
|
"Article downloaded successfully".green().to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(1, 0, 0, 1)),
|
||||||
|
"Article failed to download".red().to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(10, 10, 0, 0)),
|
||||||
|
"All articles downloaded successfully".green().to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(10, 0, 0, 10)),
|
||||||
|
"All articles failed to download".red().to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(10, 8, 0, 2)),
|
||||||
|
"8 articles downloaded successfully, 2 articles failed"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(10, 1, 0, 9)),
|
||||||
|
"1 article downloaded successfully, 9 articles failed"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(7, 6, 0, 1)),
|
||||||
|
"6 articles downloaded successfully, 1 article failed"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(7, 4, 2, 1)),
|
||||||
|
"4 articles downloaded successfully, 2 articles partially failed to download, 1 article failed"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(12, 6, 6, 0)),
|
||||||
|
"6 articles downloaded successfully, 6 articles partially failed to download"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(5, 0, 4, 1)),
|
||||||
|
"4 articles partially failed to download, 1 article failed"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
short_summary(DownloadCount::new(4, 0, 4, 0)),
|
||||||
|
"All articles partially failed to download"
|
||||||
|
.yellow()
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic(
|
||||||
|
expected = "initial_count must be equal to the sum of failed and successful count"
|
||||||
|
)]
|
||||||
|
fn test_short_summary_panics_on_invalid_input() {
|
||||||
|
short_summary(DownloadCount::new(0, 12, 0, 43));
|
||||||
|
}
|
||||||
|
}
|
92
src/main.rs
92
src/main.rs
|
@ -3,21 +3,28 @@ extern crate lazy_static;
|
||||||
|
|
||||||
use async_std::stream;
|
use async_std::stream;
|
||||||
use async_std::task;
|
use async_std::task;
|
||||||
|
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||||
|
use comfy_table::{ContentArrangement, Table};
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
use log::{debug, warn};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
mod cli;
|
mod cli;
|
||||||
mod epub;
|
mod epub;
|
||||||
|
mod errors;
|
||||||
mod extractor;
|
mod extractor;
|
||||||
/// This module is responsible for async HTTP calls for downloading
|
/// This module is responsible for async HTTP calls for downloading
|
||||||
/// the HTML content and images
|
/// the HTML content and images
|
||||||
mod http;
|
mod http;
|
||||||
|
mod logs;
|
||||||
mod moz_readability;
|
mod moz_readability;
|
||||||
|
|
||||||
use cli::AppConfig;
|
use cli::AppConfig;
|
||||||
use epub::generate_epubs;
|
use epub::generate_epubs;
|
||||||
use extractor::Extractor;
|
use extractor::Extractor;
|
||||||
use http::{download_images, fetch_url};
|
use http::{download_images, fetch_html};
|
||||||
|
use logs::display_summary;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app_config = cli::cli_init();
|
let app_config = cli::cli_init();
|
||||||
|
@ -28,29 +35,92 @@ fn main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn download(app_config: AppConfig) {
|
fn download(app_config: AppConfig) {
|
||||||
|
let mut errors = Vec::new();
|
||||||
|
let mut partial_download_count: usize = 0;
|
||||||
|
let bar = if app_config.can_disable_progress_bar() {
|
||||||
|
ProgressBar::hidden()
|
||||||
|
} else {
|
||||||
|
let enabled_bar = ProgressBar::new(app_config.urls().len() as u64);
|
||||||
|
let style = ProgressStyle::default_bar().template(
|
||||||
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
||||||
|
);
|
||||||
|
enabled_bar.set_style(style);
|
||||||
|
enabled_bar.enable_steady_tick(500);
|
||||||
|
enabled_bar
|
||||||
|
};
|
||||||
let articles = task::block_on(async {
|
let articles = task::block_on(async {
|
||||||
let urls_iter = app_config.urls().iter().map(|url| fetch_url(url));
|
let urls_iter = app_config.urls().iter().map(|url| fetch_html(url));
|
||||||
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn());
|
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn());
|
||||||
let mut articles = Vec::new();
|
let mut articles = Vec::new();
|
||||||
while let Some(fetch_result) = responses.next().await {
|
while let Some(fetch_result) = responses.next().await {
|
||||||
match fetch_result {
|
match fetch_result {
|
||||||
Ok((url, html)) => {
|
Ok((url, html)) => {
|
||||||
println!("Extracting");
|
debug!("Extracting {}", &url);
|
||||||
let mut extractor = Extractor::from_html(&html);
|
let mut extractor = Extractor::from_html(&html, &url);
|
||||||
extractor.extract_content(&url);
|
bar.set_message("Extracting...");
|
||||||
|
match extractor.extract_content() {
|
||||||
if extractor.article().is_some() {
|
Ok(_) => {
|
||||||
extractor.extract_img_urls();
|
extractor.extract_img_urls();
|
||||||
download_images(&mut extractor, &Url::parse(&url).unwrap())
|
if let Err(img_errors) =
|
||||||
|
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
||||||
.await
|
.await
|
||||||
.expect("Unable to download images");
|
{
|
||||||
|
partial_download_count += 1;
|
||||||
|
warn!(
|
||||||
|
"{} image{} failed to download for {}",
|
||||||
|
img_errors.len(),
|
||||||
|
if img_errors.len() > 1 { "s" } else { "" },
|
||||||
|
url
|
||||||
|
);
|
||||||
|
for img_error in img_errors {
|
||||||
|
warn!(
|
||||||
|
"{}\n\t\tReason {}",
|
||||||
|
img_error.url().as_ref().unwrap(),
|
||||||
|
img_error
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
articles.push(extractor);
|
articles.push(extractor);
|
||||||
}
|
}
|
||||||
|
Err(mut e) => {
|
||||||
|
e.set_article_source(&url);
|
||||||
|
errors.push(e);
|
||||||
}
|
}
|
||||||
Err(e) => eprintln!("{}", e),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Err(e) => errors.push(e),
|
||||||
|
}
|
||||||
|
bar.inc(1);
|
||||||
|
}
|
||||||
articles
|
articles
|
||||||
});
|
});
|
||||||
generate_epubs(articles, app_config.merged());
|
bar.finish_with_message("Downloaded articles");
|
||||||
|
|
||||||
|
let mut succesful_articles_table = Table::new();
|
||||||
|
succesful_articles_table
|
||||||
|
.load_preset(UTF8_FULL)
|
||||||
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
match generate_epubs(articles, &app_config, &mut succesful_articles_table) {
|
||||||
|
Ok(_) => (),
|
||||||
|
Err(gen_epub_errors) => {
|
||||||
|
errors.extend(gen_epub_errors);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let has_errors = !errors.is_empty();
|
||||||
|
display_summary(
|
||||||
|
app_config.urls().len(),
|
||||||
|
succesful_articles_table,
|
||||||
|
partial_download_count,
|
||||||
|
errors,
|
||||||
|
);
|
||||||
|
if app_config.is_logging_to_file() {
|
||||||
|
println!(
|
||||||
|
"Log written to paperoni_{}.log\n",
|
||||||
|
app_config.start_time().format("%Y-%m-%d_%H-%M-%S")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if has_errors {
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,8 +7,11 @@ use kuchiki::{
|
||||||
traits::*,
|
traits::*,
|
||||||
NodeData, NodeRef,
|
NodeData, NodeRef,
|
||||||
};
|
};
|
||||||
|
use log::info;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
|
use crate::errors::{ErrorKind, PaperoniError};
|
||||||
|
|
||||||
const DEFAULT_CHAR_THRESHOLD: usize = 500;
|
const DEFAULT_CHAR_THRESHOLD: usize = 500;
|
||||||
const FLAG_STRIP_UNLIKELYS: u32 = 0x1;
|
const FLAG_STRIP_UNLIKELYS: u32 = 0x1;
|
||||||
const FLAG_WEIGHT_CLASSES: u32 = 0x2;
|
const FLAG_WEIGHT_CLASSES: u32 = 0x2;
|
||||||
|
@ -76,14 +79,15 @@ impl Readability {
|
||||||
metadata: MetaData::new(),
|
metadata: MetaData::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn parse(&mut self, url: &str) {
|
pub fn parse(&mut self, url: &str) -> Result<(), PaperoniError> {
|
||||||
self.unwrap_no_script_tags();
|
self.unwrap_no_script_tags();
|
||||||
self.remove_scripts();
|
self.remove_scripts();
|
||||||
self.prep_document();
|
self.prep_document();
|
||||||
self.metadata = self.get_article_metadata();
|
self.metadata = self.get_article_metadata();
|
||||||
self.article_title = self.metadata.title.clone();
|
self.article_title = self.metadata.title.clone();
|
||||||
self.grab_article();
|
self.grab_article()?;
|
||||||
self.post_process_content(url);
|
self.post_process_content(url);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Recursively check if node is image, or if node contains exactly only one image
|
/// Recursively check if node is image, or if node contains exactly only one image
|
||||||
|
@ -426,8 +430,7 @@ impl Readability {
|
||||||
let mut matches = None;
|
let mut matches = None;
|
||||||
if let Some(property) = node_attr.get("property") {
|
if let Some(property) = node_attr.get("property") {
|
||||||
matches = regexes::PROPERTY_REGEX.captures(property);
|
matches = regexes::PROPERTY_REGEX.captures(property);
|
||||||
if matches.is_some() {
|
if let Some(captures) = &matches {
|
||||||
let captures = matches.as_ref().unwrap();
|
|
||||||
for capture in captures.iter() {
|
for capture in captures.iter() {
|
||||||
let mut name = capture.unwrap().as_str().to_lowercase();
|
let mut name = capture.unwrap().as_str().to_lowercase();
|
||||||
name = regexes::REPLACE_WHITESPACE_REGEX
|
name = regexes::REPLACE_WHITESPACE_REGEX
|
||||||
|
@ -561,7 +564,7 @@ impl Readability {
|
||||||
.root_node
|
.root_node
|
||||||
.select_first("title")
|
.select_first("title")
|
||||||
.map(|title| title.text_contents().trim().to_string())
|
.map(|title| title.text_contents().trim().to_string())
|
||||||
.expect("This file has no <title> tag to extract a title from");
|
.unwrap_or("".to_string());
|
||||||
let orig_title = cur_title.clone();
|
let orig_title = cur_title.clone();
|
||||||
let mut title_had_hierarchical_separators = false;
|
let mut title_had_hierarchical_separators = false;
|
||||||
let word_count = |s: &str| -> usize { s.split_whitespace().count() };
|
let word_count = |s: &str| -> usize { s.split_whitespace().count() };
|
||||||
|
@ -595,8 +598,8 @@ impl Readability {
|
||||||
}
|
}
|
||||||
} else if cur_title.len() > 150 || cur_title.len() < 15 {
|
} else if cur_title.len() > 150 || cur_title.len() < 15 {
|
||||||
let mut h1_nodes = self.root_node.select("h1").unwrap();
|
let mut h1_nodes = self.root_node.select("h1").unwrap();
|
||||||
let (_, h1_count) = h1_nodes.size_hint();
|
let h1_count = self.root_node.select("h1").unwrap().count();
|
||||||
if Some(1) == h1_count {
|
if h1_count == 1 {
|
||||||
cur_title = Self::get_inner_text(h1_nodes.next().unwrap().as_node(), None);
|
cur_title = Self::get_inner_text(h1_nodes.next().unwrap().as_node(), None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -799,6 +802,7 @@ impl Readability {
|
||||||
state = State::ReadProp;
|
state = State::ReadProp;
|
||||||
decl.1 = Some(token.trim().to_string());
|
decl.1 = Some(token.trim().to_string());
|
||||||
tokens.push(decl.clone());
|
tokens.push(decl.clone());
|
||||||
|
decl = (None, None);
|
||||||
token.clear();
|
token.clear();
|
||||||
} else {
|
} else {
|
||||||
token.push(c);
|
token.push(c);
|
||||||
|
@ -819,11 +823,18 @@ impl Readability {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !token.is_empty() {
|
if !token.is_empty() {
|
||||||
|
match state {
|
||||||
|
State::ReadVal => {
|
||||||
decl.1 = Some(token.trim().to_string());
|
decl.1 = Some(token.trim().to_string());
|
||||||
tokens.push(decl);
|
tokens.push(decl);
|
||||||
}
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tokens
|
tokens
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
.filter(|tok_pair| tok_pair.0.is_some() && tok_pair.1.is_some())
|
||||||
.map(|tok_pair| (tok_pair.0.unwrap(), tok_pair.1.unwrap()))
|
.map(|tok_pair| (tok_pair.0.unwrap(), tok_pair.1.unwrap()))
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
@ -1576,16 +1587,14 @@ impl Readability {
|
||||||
|
|
||||||
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
||||||
/// a user wants to read. Then return it wrapped up in a div.
|
/// a user wants to read. Then return it wrapped up in a div.
|
||||||
fn grab_article(&mut self) {
|
fn grab_article(&mut self) -> Result<(), PaperoniError> {
|
||||||
println!("Grabbing article");
|
info!("Grabbing article {:?}", self.metadata.title);
|
||||||
// var doc = this._doc;
|
// var doc = this._doc;
|
||||||
// var isPaging = (page !== null ? true: false);
|
// var isPaging = (page !== null ? true: false);
|
||||||
// page = page ? page : this._doc.body;
|
// page = page ? page : this._doc.body;
|
||||||
let page = self.root_node.select_first("body");
|
let page = self.root_node.select_first("body");
|
||||||
if page.is_err() {
|
if page.is_err() {
|
||||||
// TODO:Have error logging for this
|
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
|
||||||
println!("Document has no <body>");
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
let page = page.unwrap();
|
let page = page.unwrap();
|
||||||
let mut attempts: Vec<ExtractAttempt> = Vec::new();
|
let mut attempts: Vec<ExtractAttempt> = Vec::new();
|
||||||
|
@ -2075,8 +2084,10 @@ impl Readability {
|
||||||
attempts.push(ExtractAttempt::new(article_content.clone(), text_length));
|
attempts.push(ExtractAttempt::new(article_content.clone(), text_length));
|
||||||
attempts.sort_by(|a, b| b.length.partial_cmp(&a.length).unwrap());
|
attempts.sort_by(|a, b| b.length.partial_cmp(&a.length).unwrap());
|
||||||
if attempts.first().as_ref().unwrap().length == 0 {
|
if attempts.first().as_ref().unwrap().length == 0 {
|
||||||
println!("Unable to extract content");
|
return Err(ErrorKind::ReadabilityError(
|
||||||
break;
|
"Unable to extract content".into(),
|
||||||
|
)
|
||||||
|
.into());
|
||||||
}
|
}
|
||||||
article_content = attempts[0].article.clone();
|
article_content = attempts[0].article.clone();
|
||||||
parse_successful = true;
|
parse_successful = true;
|
||||||
|
@ -2102,7 +2113,8 @@ impl Readability {
|
||||||
false
|
false
|
||||||
});
|
});
|
||||||
self.article_node = Some(article_content);
|
self.article_node = Some(article_content);
|
||||||
return;
|
info!("Successfully grabbed article {:?}", self.metadata.title);
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2460,12 +2472,24 @@ mod test {
|
||||||
css_map.insert("align-items".to_string(), "center".to_string());
|
css_map.insert("align-items".to_string(), "center".to_string());
|
||||||
css_map.insert("border".to_string(), "2px solid black".to_string());
|
css_map.insert("border".to_string(), "2px solid black".to_string());
|
||||||
|
|
||||||
let css_str_to_vec = Readability::inline_css_str_to_map(css_str);
|
let css_str_to_map = Readability::inline_css_str_to_map(css_str);
|
||||||
assert_eq!(css_map, css_str_to_vec);
|
assert_eq!(css_map, css_str_to_map);
|
||||||
let mut css_map = HashMap::new();
|
let mut css_map = HashMap::new();
|
||||||
css_map.insert("color".to_string(), "red".to_string());
|
css_map.insert("color".to_string(), "red".to_string());
|
||||||
css_map.insert("background-image".to_string(), "url('')".to_string());
|
css_map.insert("background-image".to_string(), "url('')".to_string());
|
||||||
assert_eq!(css_map, Readability::inline_css_str_to_map("color: red;background-image: url('')"));
|
assert_eq!(css_map, Readability::inline_css_str_to_map("color: red;background-image: url('')"));
|
||||||
|
|
||||||
|
let empty_map = HashMap::new();
|
||||||
|
assert_eq!(empty_map, Readability::inline_css_str_to_map(" \n \t \r"));
|
||||||
|
assert_eq!(empty_map, Readability::inline_css_str_to_map("color"));
|
||||||
|
|
||||||
|
let mut css_map = HashMap::new();
|
||||||
|
css_map.insert("color".to_string(), "red".to_string());
|
||||||
|
css_map.insert("height".to_string(), "300px".to_string());
|
||||||
|
assert_eq!(
|
||||||
|
css_map,
|
||||||
|
Readability::inline_css_str_to_map("color: red;height: 300px;width")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Reference in a new issue