commit
474d97c6bd
12 changed files with 1424 additions and 281 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
/target
|
||||
*.epub
|
||||
*.epub
|
||||
*.log
|
480
Cargo.lock
generated
480
Cargo.lock
generated
|
@ -126,12 +126,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "async-global-executor"
|
||||
version = "1.4.3"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73079b49cd26b8fd5a15f68fc7707fc78698dc2a3d61430f2a7a9430230dfa04"
|
||||
checksum = "9586ec52317f36de58453159d48351bc244bc24ced3effc1fce22f3d48664af6"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"async-executor",
|
||||
"async-io",
|
||||
"async-mutex",
|
||||
"blocking",
|
||||
"futures-lite",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
|
@ -147,7 +150,7 @@ dependencies = [
|
|||
"fastrand",
|
||||
"futures-lite",
|
||||
"libc",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"nb-connect",
|
||||
"once_cell",
|
||||
"parking",
|
||||
|
@ -157,6 +160,15 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b"
|
||||
dependencies = [
|
||||
"event-listener",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-mutex"
|
||||
version = "1.4.0"
|
||||
|
@ -168,14 +180,14 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "async-std"
|
||||
version = "1.7.0"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7e82538bc65a25dbdff70e4c5439d52f068048ab97cdea0acd73f131594caa1"
|
||||
checksum = "d9f06685bad74e0570f5213741bea82158279a4103d988e57bfada11ad230341"
|
||||
dependencies = [
|
||||
"async-channel",
|
||||
"async-global-executor",
|
||||
"async-io",
|
||||
"async-mutex",
|
||||
"blocking",
|
||||
"async-lock",
|
||||
"crossbeam-utils",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -183,11 +195,11 @@ dependencies = [
|
|||
"futures-lite",
|
||||
"gloo-timers",
|
||||
"kv-log-macro",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"memchr",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"pin-project-lite 0.1.11",
|
||||
"pin-project-lite 0.2.4",
|
||||
"pin-utils",
|
||||
"slab",
|
||||
"wasm-bindgen-futures",
|
||||
|
@ -394,6 +406,28 @@ dependencies = [
|
|||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"lazy_static",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "comfy-table"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17b99e9022e080d384b58d8eaf5976b42a311ff7a9669f8200eb2453c0b2b81a"
|
||||
dependencies = [
|
||||
"crossterm",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "concurrent-queue"
|
||||
version = "1.2.2"
|
||||
|
@ -403,6 +437,21 @@ dependencies = [
|
|||
"cache-padded",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.14.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3993e6445baa160675931ec041a5e03ca84b9c6e32a056150d3aa2bdda0a1f45"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"regex",
|
||||
"terminal_size",
|
||||
"unicode-width",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.3"
|
||||
|
@ -453,6 +502,31 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c36c10130df424b2f3552fcc2ddcd9b28a27b1e54b358b45874f88d1ca6888c"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"crossterm_winapi",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"mio",
|
||||
"parking_lot",
|
||||
"signal-hook",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossterm_winapi"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0da8964ace4d3e4a044fd027919b2237000b24315a37c916f61809f1ff2140b9"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-mac"
|
||||
version = "0.10.0"
|
||||
|
@ -490,6 +564,16 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctor"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctr"
|
||||
version = "0.6.0"
|
||||
|
@ -530,6 +614,16 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "4.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.3.1"
|
||||
|
@ -556,6 +650,26 @@ dependencies = [
|
|||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "directories"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e69600ff1703123957937708eb27f7a564e48885c537782722ed0ba3189ce1d7"
|
||||
dependencies = [
|
||||
"dirs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03d86534ed367a67548dc68113a0f5db55432fdfbb6e6f9d77704397d95d5780"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "discard"
|
||||
version = "1.0.4"
|
||||
|
@ -577,6 +691,12 @@ dependencies = [
|
|||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.26"
|
||||
|
@ -640,6 +760,22 @@ dependencies = [
|
|||
"miniz_oxide 0.3.7",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flexi_logger"
|
||||
version = "0.17.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33ab94b6ac8eb69f1496a6993f26f785b5fd6d99b7416023eb2a6175c0b242b1"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"chrono",
|
||||
"glob",
|
||||
"lazy_static",
|
||||
"log 0.4.14",
|
||||
"regex",
|
||||
"thiserror",
|
||||
"yansi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flume"
|
||||
version = "0.9.2"
|
||||
|
@ -685,9 +821,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
|
||||
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -700,9 +836,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
|
||||
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
|
@ -710,15 +846,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
|
||||
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
|
||||
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
|
@ -727,9 +863,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
|
||||
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
|
@ -748,9 +884,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
|
||||
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
|
@ -760,24 +896,21 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
|
||||
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.12"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
|
||||
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
|
@ -823,6 +956,17 @@ dependencies = [
|
|||
"wasi 0.9.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"libc",
|
||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ghash"
|
||||
version = "0.3.0"
|
||||
|
@ -838,6 +982,12 @@ version = "0.23.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6503fe142514ca4799d4c26297c4248239fe8838d827db6bd6065c6ed29a6ce"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "gloo-timers"
|
||||
version = "0.2.1"
|
||||
|
@ -851,6 +1001,15 @@ dependencies = [
|
|||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.17"
|
||||
|
@ -895,7 +1054,7 @@ version = "0.25.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"mac",
|
||||
"markup5ever",
|
||||
"proc-macro2",
|
||||
|
@ -916,15 +1075,17 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "http-client"
|
||||
version = "6.2.0"
|
||||
version = "6.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "010092b71b94ee49293995625ce7a607778b8b4099c8088fa84fd66bd3e0f21c"
|
||||
checksum = "5566ecc26bc6b04e773e680d66141fced78e091ad818e420d726c152b05a64ff"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"async-trait",
|
||||
"cfg-if 1.0.0",
|
||||
"dashmap",
|
||||
"http-types",
|
||||
"isahc",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -960,6 +1121,18 @@ dependencies = [
|
|||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indicatif"
|
||||
version = "0.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7baab56125e25686df467fe470785512329883aab42696d661247aca2a2896e4"
|
||||
dependencies = [
|
||||
"console",
|
||||
"lazy_static",
|
||||
"number_prefix",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "infer"
|
||||
version = "0.2.3"
|
||||
|
@ -988,7 +1161,7 @@ dependencies = [
|
|||
"flume",
|
||||
"futures-lite",
|
||||
"http",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"once_cell",
|
||||
"slab",
|
||||
"sluice",
|
||||
|
@ -1031,7 +1204,7 @@ version = "1.0.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1042,9 +1215,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.80"
|
||||
version = "0.2.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||
|
||||
[[package]]
|
||||
name = "libnghttp2-sys"
|
||||
|
@ -1083,16 +1256,17 @@ version = "0.3.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.11"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if 1.0.0",
|
||||
"value-bag",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1107,7 +1281,7 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aae38d669396ca9b707bfc3db254bc382ddb94f57cc5c235f34623a669a01dab"
|
||||
dependencies = [
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"serde",
|
||||
|
@ -1171,6 +1345,28 @@ dependencies = [
|
|||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.7.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf80d3e903b34e0bd7282b218398aec54e082c840d9baf8339e0080a0c542956"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log 0.4.14",
|
||||
"miow",
|
||||
"ntapi",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miow"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mustache"
|
||||
version = "0.9.0"
|
||||
|
@ -1203,6 +1399,15 @@ version = "0.1.14"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
|
||||
|
||||
[[package]]
|
||||
name = "ntapi"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.44"
|
||||
|
@ -1232,6 +1437,12 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17b02fc0ff9a9e4b35b3342880f48e896ebf69f2967921fe8646bf5b7125956a"
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.22.0"
|
||||
|
@ -1271,18 +1482,26 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "paperoni"
|
||||
version = "0.3.0-alpha1"
|
||||
version = "0.4.0-alpha1"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"chrono",
|
||||
"clap",
|
||||
"colored",
|
||||
"comfy-table",
|
||||
"directories",
|
||||
"epub-builder",
|
||||
"flexi_logger",
|
||||
"futures",
|
||||
"html5ever",
|
||||
"indicatif",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"log 0.4.14",
|
||||
"md5",
|
||||
"regex",
|
||||
"surf",
|
||||
"thiserror",
|
||||
"url",
|
||||
]
|
||||
|
||||
|
@ -1292,6 +1511,31 @@ version = "2.0.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
|
||||
dependencies = [
|
||||
"instant",
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"instant",
|
||||
"libc",
|
||||
"redox_syscall 0.2.6",
|
||||
"smallvec",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.1.0"
|
||||
|
@ -1404,7 +1648,7 @@ checksum = "a2a7bc6b2a29e632e45451c941832803a18cce6781db04de8a04696cdca8bde4"
|
|||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"libc",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"wepoll-sys",
|
||||
"winapi",
|
||||
]
|
||||
|
@ -1480,7 +1724,7 @@ version = "0.7.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.1.15",
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core 0.5.1",
|
||||
|
@ -1519,7 +1763,7 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"getrandom 0.1.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1556,22 +1800,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
name = "redox_syscall"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||
dependencies = [
|
||||
"getrandom 0.2.2",
|
||||
"redox_syscall 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "957056ecddbeba1b26965114e191d2e8589ce74db242b6ea25fc4062427a5c19"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.21"
|
||||
version = "0.6.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548"
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
|
@ -1629,7 +1891,7 @@ dependencies = [
|
|||
"cssparser",
|
||||
"derive_more",
|
||||
"fxhash",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"matches",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
|
@ -1738,6 +2000,26 @@ dependencies = [
|
|||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e31d442c16f047a671b5a71e2161d6e68814012b7f5379d269ebd915fac2729"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16f1d0fef1604ba8f7a073c7e701f213e056707210e9020af4528e0101ce11a6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.3"
|
||||
|
@ -1763,9 +2045,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.5.0"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7acad6f34eb9e8a259d3283d1e8c1d34d7415943d4895f65cc73813c7396fc85"
|
||||
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
|
@ -1775,7 +2057,7 @@ checksum = "2c29947abdee2a218277abeca306f25789c938e500ea5a9d4b12a5a504466902"
|
|||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"redox_syscall 0.1.57",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
|
@ -1883,6 +2165,24 @@ version = "0.8.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7318c509b5ba57f18533982607f24070a55d353e90d4cae30c467cdb2ad5ac5c"
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.20.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee8bc6b87a5112aeeab1f4a9f7ab634fe6cbefc4850006df31267f4cfb9e3149"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.3.0"
|
||||
|
@ -1891,21 +2191,21 @@ checksum = "343f3f510c2915908f155e94f17220b19ccfacf2a64a2a5d8004f2c3e311e7fd"
|
|||
|
||||
[[package]]
|
||||
name = "surf"
|
||||
version = "2.1.0"
|
||||
version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7189c787d96fe18fef704950de76d590022d9d70858a4a201e1f07a0666882ea"
|
||||
checksum = "2a154d33ca6b5e1fe6fd1c760e5a5cc1202425f6cca2e13229f16a69009f6328"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"async-trait",
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if 1.0.0",
|
||||
"encoding_rs",
|
||||
"futures-util",
|
||||
"http-client",
|
||||
"http-types",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"mime_guess",
|
||||
"once_cell",
|
||||
"pin-project-lite 0.1.11",
|
||||
"pin-project-lite 0.2.4",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"web-sys",
|
||||
|
@ -1943,6 +2243,16 @@ dependencies = [
|
|||
"utf-8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "terminal_size"
|
||||
version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
|
@ -1960,33 +2270,24 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.22"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e"
|
||||
checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.22"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56"
|
||||
checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
|
@ -2058,7 +2359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "b0987850db3733619253fe60e17cb59b82d37c7e6c0236bb81e4d6b87c879f27"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"pin-project-lite 0.1.11",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
|
@ -2127,6 +2428,12 @@ dependencies = [
|
|||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.8"
|
||||
|
@ -2151,9 +2458,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.2.0"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
|
||||
checksum = "9ccd964113622c8e9322cfac19eb1004a07e636c545f325da085d5cdde6f1f8b"
|
||||
dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
|
@ -2183,6 +2490,15 @@ dependencies = [
|
|||
"rand 0.7.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "value-bag"
|
||||
version = "1.0.0-alpha.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b676010e055c99033117c2343b33a40a30b91fecd6c49055ac9cd2d6c305ab1"
|
||||
dependencies = [
|
||||
"ctor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.10"
|
||||
|
@ -2243,7 +2559,7 @@ checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
|
|||
dependencies = [
|
||||
"bumpalo",
|
||||
"lazy_static",
|
||||
"log 0.4.11",
|
||||
"log 0.4.14",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
|
@ -2332,6 +2648,12 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "yansi"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fc79f4a1e39857fc00c3f662cbf2651c771f00e9c15fe2abc341806bd46bd71"
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.5.8"
|
||||
|
|
21
Cargo.toml
21
Cargo.toml
|
@ -3,7 +3,7 @@ description = "A web article downloader"
|
|||
homepage = "https://github.com/hipstermojo/paperoni"
|
||||
repository = "https://github.com/hipstermojo/paperoni"
|
||||
name = "paperoni"
|
||||
version = "0.3.0-alpha1"
|
||||
version = "0.4.0-alpha1"
|
||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
@ -12,14 +12,23 @@ readme = "README.md"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
async-std = "1.7.0"
|
||||
async-std = "1.9.0"
|
||||
# atty = "0.2.14"
|
||||
chrono = "0.4.19"
|
||||
clap = "2.33.3"
|
||||
colored = "2.0.0"
|
||||
comfy-table = "2.1.0"
|
||||
directories = "3.0.2"
|
||||
epub-builder = "0.4.8"
|
||||
futures = "0.3.12"
|
||||
flexi_logger = "0.17.1"
|
||||
futures = "0.3.14"
|
||||
html5ever = "0.25.1"
|
||||
indicatif = "0.15.0"
|
||||
kuchiki = "0.8.1"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
md5 = "0.7.0"
|
||||
regex = "1.4.2"
|
||||
surf = "2.1.0"
|
||||
url = "2.2.0"
|
||||
regex = "1.4.5"
|
||||
surf = "2.2.0"
|
||||
thiserror = "1.0.24"
|
||||
url = "2.2.1"
|
||||
|
|
44
README.md
44
README.md
|
@ -1,8 +1,10 @@
|
|||
![crates.io](https://img.shields.io/crates/v/paperoni.svg)
|
||||
|
||||
<p align="center"><img src="./paperoni-dark.png"></p>
|
||||
|
||||
<p align="center"><i>Salami not included</i></p>
|
||||
|
||||
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
||||
Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs.
|
||||
|
||||
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
||||
|
||||
|
@ -17,7 +19,7 @@ Check the [releases](https://github.com/hipstermojo/paperoni/releases) page for
|
|||
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
||||
|
||||
```sh
|
||||
cargo install paperoni --version 0.3.0-alpha1
|
||||
cargo install paperoni --version 0.4.0-alpha1
|
||||
```
|
||||
|
||||
_Paperoni is still in alpha so the `version` flag has to be passed._
|
||||
|
@ -37,6 +39,27 @@ cargo run -- # pass your url here
|
|||
|
||||
## Usage
|
||||
|
||||
```
|
||||
USAGE:
|
||||
paperoni [OPTIONS] [urls]...
|
||||
|
||||
OPTIONS:
|
||||
-f, --file <file> Input file containing links
|
||||
-h, --help Prints help information
|
||||
--log-to-file Enables logging of events to a file located in .paperoni/logs with a default log level
|
||||
of debug. Use -v to specify the logging level
|
||||
--max_conn <max_conn> The maximum number of concurrent HTTP connections when downloading articles. Default is
|
||||
8
|
||||
--merge <output_name> Merge multiple articles into a single epub
|
||||
-V, --version Prints version information
|
||||
-v Enables logging of events and set the verbosity level. Use -h to read on its usage
|
||||
|
||||
ARGS:
|
||||
<urls>... Urls of web articles
|
||||
```
|
||||
|
||||
To download a single article pass in its URL
|
||||
|
||||
```sh
|
||||
paperoni https://en.wikipedia.org/wiki/Pepperoni
|
||||
```
|
||||
|
@ -68,10 +91,23 @@ into a single epub using the `merge` flag and specifying the output file.
|
|||
paperoni -f links.txt --merge out.epub
|
||||
```
|
||||
|
||||
### Logging events
|
||||
|
||||
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
||||
|
||||
```
|
||||
-v Logs only the error level
|
||||
-vv Logs only the warn level
|
||||
-vvv Logs only the info level
|
||||
-vvvv Logs only the debug level
|
||||
```
|
||||
|
||||
If only the `-v` flag is passed, the progress bars are disabled. If both `-v` and `--log-to-file` are passed then the progress bars will still be shown.
|
||||
|
||||
## How it works
|
||||
|
||||
The URL passed to Paperoni is fetched and the returned HTML response is passed to the extractor.
|
||||
This extractor retrieves a possible article using a port of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
||||
This extractor retrieves a possible article using a [custom port](https://github.com/hipstermojo/paperoni/blob/master/src/moz_readability/mod.rs) of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
||||
|
||||
> The port of the algorithm is still unstable as well so it is not fully compatible with all the websites that can be extracted using Readability.
|
||||
|
||||
|
@ -82,3 +118,5 @@ This program is still in alpha so a number of things won't work:
|
|||
- Websites that only run with JavaScript cannot be extracted.
|
||||
- Website articles that cannot be extracted by Readability cannot be extracted by Paperoni either.
|
||||
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
||||
|
||||
There are also web pages it won't work on in general such as Twitter and Reddit threads.
|
||||
|
|
105
src/cli.rs
105
src/cli.rs
|
@ -1,6 +1,10 @@
|
|||
use std::{fs::File, io::Read};
|
||||
use std::{fs::File, io::Read, path::Path};
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
use clap::{App, AppSettings, Arg};
|
||||
use flexi_logger::LevelFilter as LogLevel;
|
||||
|
||||
use crate::logs::init_logger;
|
||||
|
||||
pub fn cli_init() -> AppConfig {
|
||||
let app = App::new("paperoni")
|
||||
|
@ -8,12 +12,9 @@ pub fn cli_init() -> AppConfig {
|
|||
AppSettings::ArgRequiredElseHelp,
|
||||
AppSettings::UnifiedHelpMessage,
|
||||
])
|
||||
.version("0.3.0-alpha1")
|
||||
.version(clap::crate_version!())
|
||||
.about(
|
||||
"
|
||||
Paperoni is an article downloader.
|
||||
It takes a url and downloads the article content from it and saves it to an epub.
|
||||
",
|
||||
"Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs",
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("urls")
|
||||
|
@ -38,8 +39,29 @@ It takes a url and downloads the article content from it and saves it to an epub
|
|||
.long("max_conn")
|
||||
.help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
|
||||
.long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
|
||||
.takes_value(true));
|
||||
.takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("verbosity")
|
||||
.short("v")
|
||||
.multiple(true)
|
||||
.help("Enables logging of events and set the verbosity level. Use --help to read on its usage")
|
||||
.long_help(
|
||||
"This takes upto 4 levels of verbosity in the following order.
|
||||
- Error (-v)
|
||||
- Warn (-vv)
|
||||
- Info (-vvv)
|
||||
- Debug (-vvvv)
|
||||
When this flag is passed, it disables the progress bars and logs to stderr.
|
||||
If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag."
|
||||
)
|
||||
.takes_value(false))
|
||||
.arg(
|
||||
Arg::with_name("log-to-file")
|
||||
.long("log-to-file")
|
||||
.help("Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level")
|
||||
.takes_value(false));
|
||||
let arg_matches = app.get_matches();
|
||||
|
||||
let mut urls: Vec<String> = match arg_matches.value_of("file") {
|
||||
Some(file_name) => {
|
||||
if let Ok(mut file) = File::open(file_name) {
|
||||
|
@ -76,14 +98,51 @@ It takes a url and downloads the article content from it and saves it to an epub
|
|||
|
||||
let mut app_config = AppConfig::new(max_conn);
|
||||
app_config.set_urls(urls);
|
||||
|
||||
if let Some(name) = arg_matches.value_of("output_name") {
|
||||
let file_name = if name.ends_with(".epub") && name.len() > 5 {
|
||||
let file_path = Path::new(name);
|
||||
if file_path.is_dir() {
|
||||
eprintln!("{:?} is a directory", name);
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
let file_name = if file_path.extension().is_some() {
|
||||
name.to_owned()
|
||||
} else {
|
||||
name.to_owned() + ".epub"
|
||||
};
|
||||
app_config.set_merged(file_name);
|
||||
|
||||
match std::fs::File::create(&file_name) {
|
||||
Ok(_) => (),
|
||||
Err(e) => {
|
||||
eprintln!("Unable to create file {:?}\n{}", file_path, e);
|
||||
std::process::exit(1)
|
||||
}
|
||||
}
|
||||
app_config.merged = Some(file_name);
|
||||
}
|
||||
|
||||
if arg_matches.is_present("verbosity") {
|
||||
if !arg_matches.is_present("log-to-file") {
|
||||
app_config.can_disable_progress_bar = true;
|
||||
}
|
||||
let log_levels: [LogLevel; 5] = [
|
||||
LogLevel::Off,
|
||||
LogLevel::Error,
|
||||
LogLevel::Warn,
|
||||
LogLevel::Info,
|
||||
LogLevel::Debug,
|
||||
];
|
||||
let level = arg_matches.occurrences_of("verbosity").clamp(0, 4) as usize;
|
||||
app_config.log_level = log_levels[level];
|
||||
}
|
||||
if arg_matches.is_present("log-to-file") {
|
||||
app_config.log_level = LogLevel::Debug;
|
||||
app_config.is_logging_to_file = true;
|
||||
}
|
||||
|
||||
init_logger(&app_config);
|
||||
|
||||
app_config
|
||||
}
|
||||
|
||||
|
@ -91,6 +150,10 @@ pub struct AppConfig {
|
|||
urls: Vec<String>,
|
||||
max_conn: usize,
|
||||
merged: Option<String>,
|
||||
log_level: LogLevel,
|
||||
can_disable_progress_bar: bool,
|
||||
start_time: DateTime<Local>,
|
||||
is_logging_to_file: bool,
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
|
@ -99,6 +162,10 @@ impl AppConfig {
|
|||
urls: vec![],
|
||||
max_conn,
|
||||
merged: None,
|
||||
log_level: LogLevel::Off,
|
||||
can_disable_progress_bar: false,
|
||||
start_time: Local::now(),
|
||||
is_logging_to_file: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -106,10 +173,6 @@ impl AppConfig {
|
|||
self.urls.extend(urls);
|
||||
}
|
||||
|
||||
fn set_merged(&mut self, name: String) {
|
||||
self.merged = Some(name);
|
||||
}
|
||||
|
||||
pub fn urls(&self) -> &Vec<String> {
|
||||
&self.urls
|
||||
}
|
||||
|
@ -120,4 +183,20 @@ impl AppConfig {
|
|||
pub fn merged(&self) -> Option<&String> {
|
||||
self.merged.as_ref()
|
||||
}
|
||||
|
||||
pub fn log_level(&self) -> LogLevel {
|
||||
self.log_level
|
||||
}
|
||||
|
||||
pub fn can_disable_progress_bar(&self) -> bool {
|
||||
self.can_disable_progress_bar
|
||||
}
|
||||
|
||||
pub fn start_time(&self) -> &DateTime<Local> {
|
||||
&self.start_time
|
||||
}
|
||||
|
||||
pub fn is_logging_to_file(&self) -> bool {
|
||||
self.is_logging_to_file
|
||||
}
|
||||
}
|
||||
|
|
257
src/epub.rs
257
src/epub.rs
|
@ -1,32 +1,159 @@
|
|||
use std::fs::File;
|
||||
|
||||
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use log::{debug, info};
|
||||
|
||||
use crate::extractor::{self, Extractor};
|
||||
use crate::{
|
||||
cli::AppConfig,
|
||||
errors::PaperoniError,
|
||||
extractor::{self, Extractor},
|
||||
};
|
||||
|
||||
pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
||||
match merged {
|
||||
pub fn generate_epubs(
|
||||
articles: Vec<Extractor>,
|
||||
app_config: &AppConfig,
|
||||
successful_articles_table: &mut Table,
|
||||
) -> Result<(), Vec<PaperoniError>> {
|
||||
let bar = if app_config.can_disable_progress_bar() {
|
||||
ProgressBar::hidden()
|
||||
} else {
|
||||
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
||||
let style = ProgressStyle::default_bar().template(
|
||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
||||
);
|
||||
enabled_bar.set_style(style);
|
||||
if !articles.is_empty() {
|
||||
enabled_bar.set_message("Generating epubs");
|
||||
}
|
||||
enabled_bar
|
||||
};
|
||||
|
||||
let mut errors: Vec<PaperoniError> = Vec::new();
|
||||
|
||||
match app_config.merged() {
|
||||
Some(name) => {
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
successful_articles_table.set_header(vec![Cell::new("Table of Contents")
|
||||
.add_attribute(Attribute::Bold)
|
||||
.set_alignment(CellAlignment::Center)
|
||||
.fg(Color::Green)]);
|
||||
|
||||
let mut epub = match EpubBuilder::new(match ZipLibrary::new() {
|
||||
Ok(zip_library) => zip_library,
|
||||
Err(err) => {
|
||||
let mut paperoni_err: PaperoniError = err.into();
|
||||
paperoni_err.set_article_source(name);
|
||||
errors.push(paperoni_err);
|
||||
return Err(errors);
|
||||
}
|
||||
}) {
|
||||
Ok(epub) => epub,
|
||||
Err(err) => {
|
||||
let mut paperoni_err: PaperoniError = err.into();
|
||||
paperoni_err.set_article_source(name);
|
||||
errors.push(paperoni_err);
|
||||
return Err(errors);
|
||||
}
|
||||
};
|
||||
debug!("Creating {:?}", name);
|
||||
epub.inline_toc();
|
||||
epub = articles
|
||||
articles
|
||||
.iter()
|
||||
.enumerate()
|
||||
.fold(epub, |mut epub, (idx, article)| {
|
||||
.fold(&mut epub, |epub, (idx, article)| {
|
||||
let mut article_result = || -> Result<(), PaperoniError> {
|
||||
let mut html_buf = Vec::new();
|
||||
extractor::serialize_to_xhtml(article.article(), &mut html_buf)?;
|
||||
let html_str = std::str::from_utf8(&html_buf)?;
|
||||
epub.metadata("title", replace_metadata_value(name))?;
|
||||
let section_name = article.metadata().title();
|
||||
epub.add_content(
|
||||
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
||||
.title(replace_metadata_value(section_name)),
|
||||
)?;
|
||||
info!("Adding images for {:?}", name);
|
||||
article.img_urls.iter().for_each(|img| {
|
||||
// TODO: Add error handling and return errors as a vec
|
||||
let mut file_path = std::env::temp_dir();
|
||||
file_path.push(&img.0);
|
||||
|
||||
let img_buf = File::open(&file_path).expect("Can't read file");
|
||||
epub.add_resource(
|
||||
file_path.file_name().unwrap(),
|
||||
img_buf,
|
||||
img.1.as_ref().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
info!("Added images for {:?}", name);
|
||||
Ok(())
|
||||
};
|
||||
if let Err(mut error) = article_result() {
|
||||
error.set_article_source(&article.url);
|
||||
errors.push(error);
|
||||
}
|
||||
bar.inc(1);
|
||||
successful_articles_table.add_row(vec![article.metadata().title()]);
|
||||
epub
|
||||
});
|
||||
let appendix = generate_appendix(articles.iter().collect());
|
||||
if let Err(err) = epub.add_content(
|
||||
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||
.title(replace_metadata_value("Article Sources")),
|
||||
) {
|
||||
let mut paperoni_err: PaperoniError = err.into();
|
||||
paperoni_err.set_article_source(name);
|
||||
errors.push(paperoni_err);
|
||||
return Err(errors);
|
||||
}
|
||||
|
||||
let mut out_file = File::create(&name).unwrap();
|
||||
match epub.generate(&mut out_file) {
|
||||
Ok(_) => (),
|
||||
Err(err) => {
|
||||
let mut paperoni_err: PaperoniError = err.into();
|
||||
paperoni_err.set_article_source(name);
|
||||
errors.push(paperoni_err);
|
||||
return Err(errors);
|
||||
}
|
||||
}
|
||||
|
||||
bar.finish_with_message("Generated epub\n");
|
||||
debug!("Created {:?}", name);
|
||||
println!("Created {:?}", name);
|
||||
}
|
||||
None => {
|
||||
successful_articles_table
|
||||
.set_header(vec![Cell::new("Downloaded articles")
|
||||
.add_attribute(Attribute::Bold)
|
||||
.set_alignment(CellAlignment::Center)
|
||||
.fg(Color::Green)])
|
||||
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||
|
||||
for article in &articles {
|
||||
let mut result = || -> Result<(), PaperoniError> {
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
||||
let file_name = format!(
|
||||
"{}.epub",
|
||||
article
|
||||
.metadata()
|
||||
.title()
|
||||
.replace("/", " ")
|
||||
.replace("\\", " ")
|
||||
);
|
||||
debug!("Creating {:?}", file_name);
|
||||
let mut out_file = File::create(&file_name).unwrap();
|
||||
let mut html_buf = Vec::new();
|
||||
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
||||
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
||||
.expect("Unable to serialize to xhtml");
|
||||
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
||||
epub.metadata("title", replace_metadata_value(name))
|
||||
.unwrap();
|
||||
let section_name = article.metadata().title();
|
||||
epub.add_content(
|
||||
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
||||
.title(replace_metadata_value(section_name)),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
article.img_urls.iter().for_each(|img| {
|
||||
if let Some(author) = article.metadata().byline() {
|
||||
epub.metadata("author", replace_metadata_value(author))?;
|
||||
}
|
||||
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
||||
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
||||
for img in &article.img_urls {
|
||||
let mut file_path = std::env::temp_dir();
|
||||
file_path.push(&img.0);
|
||||
|
||||
|
@ -35,52 +162,35 @@ pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
|||
file_path.file_name().unwrap(),
|
||||
img_buf,
|
||||
img.1.as_ref().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
});
|
||||
epub
|
||||
});
|
||||
let mut out_file = File::create(&name).unwrap();
|
||||
epub.generate(&mut out_file).unwrap();
|
||||
println!("Created {:?}", name);
|
||||
}
|
||||
None => {
|
||||
for article in articles {
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
let file_name = format!(
|
||||
"{}.epub",
|
||||
article
|
||||
.metadata()
|
||||
.title()
|
||||
.replace("/", " ")
|
||||
.replace("\\", " ")
|
||||
);
|
||||
let mut out_file = File::create(&file_name).unwrap();
|
||||
let mut html_buf = Vec::new();
|
||||
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
||||
.expect("Unable to serialize to xhtml");
|
||||
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
||||
if let Some(author) = article.metadata().byline() {
|
||||
epub.metadata("author", replace_metadata_value(author))
|
||||
.unwrap();
|
||||
}
|
||||
epub.metadata("title", replace_metadata_value(article.metadata().title()))
|
||||
.unwrap();
|
||||
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))
|
||||
.unwrap();
|
||||
for img in article.img_urls {
|
||||
let mut file_path = std::env::temp_dir();
|
||||
file_path.push(&img.0);
|
||||
)?;
|
||||
}
|
||||
let appendix = generate_appendix(vec![&article]);
|
||||
epub.add_content(
|
||||
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
||||
.title(replace_metadata_value("Article Source")),
|
||||
)?;
|
||||
epub.generate(&mut out_file)?;
|
||||
bar.inc(1);
|
||||
|
||||
let img_buf = File::open(&file_path).expect("Can't read file");
|
||||
epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())
|
||||
.unwrap();
|
||||
successful_articles_table.add_row(vec![article.metadata().title()]);
|
||||
|
||||
debug!("Created {:?}", file_name);
|
||||
Ok(())
|
||||
};
|
||||
if let Err(mut error) = result() {
|
||||
error.set_article_source(&article.url);
|
||||
errors.push(error);
|
||||
}
|
||||
epub.generate(&mut out_file).unwrap();
|
||||
println!("Created {:?}", file_name);
|
||||
}
|
||||
bar.finish_with_message("Generated epubs\n");
|
||||
}
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces characters that have to be escaped before adding to the epub's metadata
|
||||
|
@ -91,6 +201,37 @@ fn replace_metadata_value(value: &str) -> String {
|
|||
.replace(">", ">")
|
||||
}
|
||||
|
||||
//TODO: The type signature of the argument should change as it requires that merged articles create an entirely new Vec of references
|
||||
fn generate_appendix(articles: Vec<&Extractor>) -> String {
|
||||
let link_tags: String = articles
|
||||
.iter()
|
||||
.map(|article| {
|
||||
let article_name = if !article.metadata().title().is_empty() {
|
||||
article.metadata().title()
|
||||
} else {
|
||||
&article.url
|
||||
};
|
||||
format!(
|
||||
"<a href=\"{}\">{}</a><br></br>",
|
||||
replace_metadata_value(&article.url),
|
||||
replace_metadata_value(article_name)
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let template = format!(
|
||||
r#"<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<h2>Appendix</h2><h3>Article sources</h3>
|
||||
{}
|
||||
</body>
|
||||
</html>"#,
|
||||
link_tags
|
||||
);
|
||||
template
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::replace_metadata_value;
|
||||
|
|
126
src/errors.rs
Normal file
126
src/errors.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ErrorKind {
|
||||
#[error("[EpubError]: {0}")]
|
||||
EpubError(String),
|
||||
#[error("[HTTPError]: {0}")]
|
||||
HTTPError(String),
|
||||
#[error("[IOError]: {0}")]
|
||||
IOError(String),
|
||||
#[error("[UTF8Error]: {0}")]
|
||||
UTF8Error(String),
|
||||
#[error("[ReadabilityError]: {0}")]
|
||||
ReadabilityError(String),
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("{kind}")]
|
||||
/// Used to represent errors from downloading images. Errors from here are used solely for debugging
|
||||
/// as they are considered recoverable.
|
||||
pub struct ImgError {
|
||||
kind: ErrorKind,
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
impl ImgError {
|
||||
pub fn with_kind(kind: ErrorKind) -> Self {
|
||||
ImgError { url: None, kind }
|
||||
}
|
||||
|
||||
pub fn set_url(&mut self, url: &str) {
|
||||
self.url = Some(url.to_string());
|
||||
}
|
||||
|
||||
pub fn url(&self) -> &Option<String> {
|
||||
&self.url
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ErrorKind> for ImgError {
|
||||
fn from(kind: ErrorKind) -> Self {
|
||||
ImgError::with_kind(kind)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<surf::Error> for ImgError {
|
||||
fn from(err: surf::Error) -> Self {
|
||||
ImgError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<url::ParseError> for ImgError {
|
||||
fn from(err: url::ParseError) -> Self {
|
||||
ImgError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for ImgError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
ImgError::with_kind(ErrorKind::IOError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[error("{kind}")]
|
||||
pub struct PaperoniError {
|
||||
article_source: Option<String>,
|
||||
kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl PaperoniError {
|
||||
pub fn with_kind(kind: ErrorKind) -> Self {
|
||||
PaperoniError {
|
||||
article_source: None,
|
||||
kind,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn article_source(&self) -> &Option<String> {
|
||||
&self.article_source
|
||||
}
|
||||
|
||||
pub fn set_article_source(&mut self, article_source: &str) {
|
||||
self.article_source = Some(article_source.to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ErrorKind> for PaperoniError {
|
||||
fn from(kind: ErrorKind) -> Self {
|
||||
PaperoniError::with_kind(kind)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<epub_builder::Error> for PaperoniError {
|
||||
fn from(err: epub_builder::Error) -> Self {
|
||||
PaperoniError::with_kind(ErrorKind::EpubError(err.description().to_owned()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<surf::Error> for PaperoniError {
|
||||
fn from(err: surf::Error) -> Self {
|
||||
PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<url::ParseError> for PaperoniError {
|
||||
fn from(err: url::ParseError) -> Self {
|
||||
PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for PaperoniError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
PaperoniError::with_kind(ErrorKind::IOError(err.to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::str::Utf8Error> for PaperoniError {
|
||||
fn from(err: std::str::Utf8Error) -> Self {
|
||||
PaperoniError::with_kind(ErrorKind::UTF8Error(err.to_string()))
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|||
|
||||
use kuchiki::{traits::*, NodeRef};
|
||||
|
||||
use crate::errors::PaperoniError;
|
||||
use crate::moz_readability::{MetaData, Readability};
|
||||
|
||||
pub type ResourceInfo = (String, Option<String>);
|
||||
|
@ -14,22 +15,24 @@ pub struct Extractor {
|
|||
article: Option<NodeRef>,
|
||||
pub img_urls: Vec<ResourceInfo>,
|
||||
readability: Readability,
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
impl Extractor {
|
||||
/// Create a new instance of an HTML extractor given an HTML string
|
||||
pub fn from_html(html_str: &str) -> Self {
|
||||
pub fn from_html(html_str: &str, url: &str) -> Self {
|
||||
Extractor {
|
||||
article: None,
|
||||
img_urls: Vec::new(),
|
||||
readability: Readability::new(html_str),
|
||||
url: url.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Locates and extracts the HTML in a document which is determined to be
|
||||
/// the source of the content
|
||||
pub fn extract_content(&mut self, url: &str) {
|
||||
self.readability.parse(url);
|
||||
pub fn extract_content(&mut self) -> Result<(), PaperoniError> {
|
||||
self.readability.parse(&self.url)?;
|
||||
if let Some(article_node_ref) = &self.readability.article_node {
|
||||
let template = r#"
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
||||
|
@ -44,6 +47,7 @@ impl Extractor {
|
|||
body.as_node().append(article_node_ref.clone());
|
||||
self.article = Some(doc);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
||||
|
@ -61,8 +65,11 @@ impl Extractor {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn article(&self) -> Option<&NodeRef> {
|
||||
self.article.as_ref()
|
||||
/// Returns the extracted article [NodeRef]. It should only be called *AFTER* calling parse
|
||||
pub fn article(&self) -> &NodeRef {
|
||||
self.article.as_ref().expect(
|
||||
"Article node doesn't exist. This may be because the document has not been parsed",
|
||||
)
|
||||
}
|
||||
|
||||
pub fn metadata(&self) -> &MetaData {
|
||||
|
@ -75,7 +82,7 @@ impl Extractor {
|
|||
pub fn serialize_to_xhtml<W: std::io::Write>(
|
||||
node_ref: &NodeRef,
|
||||
mut w: &mut W,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
) -> Result<(), PaperoniError> {
|
||||
let mut escape_map = HashMap::new();
|
||||
escape_map.insert("<", "<");
|
||||
escape_map.insert(">", ">");
|
||||
|
@ -96,6 +103,7 @@ pub fn serialize_to_xhtml<W: std::io::Write>(
|
|||
let attrs_str = attrs
|
||||
.map
|
||||
.iter()
|
||||
.filter(|(k, _)| &k.local != "\"")
|
||||
.map(|(k, v)| {
|
||||
format!(
|
||||
"{}=\"{}\"",
|
||||
|
@ -156,8 +164,10 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_extract_img_urls() {
|
||||
let mut extractor = Extractor::from_html(TEST_HTML);
|
||||
extractor.extract_content("http://example.com/");
|
||||
let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/");
|
||||
extractor
|
||||
.extract_content()
|
||||
.expect("Article extraction failed unexpectedly");
|
||||
extractor.extract_img_urls();
|
||||
|
||||
assert!(extractor.img_urls.len() > 0);
|
||||
|
|
223
src/http.rs
223
src/http.rs
|
@ -1,65 +1,90 @@
|
|||
use async_std::io::prelude::*;
|
||||
use async_std::{fs::File, stream};
|
||||
use futures::StreamExt;
|
||||
use indicatif::ProgressBar;
|
||||
use log::{debug, info};
|
||||
use url::Url;
|
||||
|
||||
use crate::errors::{ErrorKind, ImgError, PaperoniError};
|
||||
use crate::extractor::Extractor;
|
||||
|
||||
type HTMLResource = (String, String);
|
||||
|
||||
pub async fn fetch_url(
|
||||
url: &str,
|
||||
) -> Result<HTMLResource, Box<dyn std::error::Error + Send + Sync>> {
|
||||
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||
let client = surf::Client::new();
|
||||
println!("Fetching...");
|
||||
debug!("Fetching {}", url);
|
||||
|
||||
let mut redirect_count: u8 = 0;
|
||||
let base_url = Url::parse(&url)?;
|
||||
let mut url = base_url.clone();
|
||||
while redirect_count < 5 {
|
||||
redirect_count += 1;
|
||||
let req = surf::get(&url);
|
||||
let mut res = client.send(req).await?;
|
||||
if res.status().is_redirection() {
|
||||
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
||||
match Url::parse(location.last().as_str()) {
|
||||
Ok(valid_url) => url = valid_url,
|
||||
Err(e) => match e {
|
||||
url::ParseError::RelativeUrlWithoutBase => {
|
||||
url = base_url.join(location.last().as_str())?
|
||||
let process_request = async {
|
||||
let mut redirect_count: u8 = 0;
|
||||
let base_url = Url::parse(&url)?;
|
||||
let mut url = base_url.clone();
|
||||
while redirect_count < 5 {
|
||||
redirect_count += 1;
|
||||
let req = surf::get(&url);
|
||||
let mut res = client.send(req).await?;
|
||||
if res.status().is_redirection() {
|
||||
if let Some(location) = res.header(surf::http::headers::LOCATION) {
|
||||
match Url::parse(location.last().as_str()) {
|
||||
Ok(valid_url) => {
|
||||
info!("Redirecting {} to {}", url, valid_url);
|
||||
url = valid_url
|
||||
}
|
||||
e => return Err(e.into()),
|
||||
},
|
||||
};
|
||||
}
|
||||
} else if res.status().is_success() {
|
||||
if let Some(mime) = res.content_type() {
|
||||
if mime.essence() == "text/html" {
|
||||
return Ok((url.to_string(), res.body_string().await?));
|
||||
Err(e) => match e {
|
||||
url::ParseError::RelativeUrlWithoutBase => {
|
||||
match base_url.join(location.last().as_str()) {
|
||||
Ok(joined_url) => {
|
||||
info!("Redirecting {} to {}", url, joined_url);
|
||||
url = joined_url;
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
}
|
||||
e => return Err(e.into()),
|
||||
},
|
||||
};
|
||||
}
|
||||
} else if res.status().is_success() {
|
||||
if let Some(mime) = res.content_type() {
|
||||
if mime.essence() == "text/html" {
|
||||
debug!("Successfully fetched {}", url);
|
||||
return Ok((url.to_string(), res.body_string().await?));
|
||||
} else {
|
||||
let msg = format!(
|
||||
"Invalid HTTP response. Received {} instead of text/html",
|
||||
mime.essence()
|
||||
);
|
||||
|
||||
return Err(ErrorKind::HTTPError(msg).into());
|
||||
}
|
||||
} else {
|
||||
return Err(format!(
|
||||
"Invalid HTTP response. Received {} instead of text/html",
|
||||
mime.essence()
|
||||
)
|
||||
.into());
|
||||
return Err(ErrorKind::HTTPError("Unknown HTTP response".to_owned()).into());
|
||||
}
|
||||
} else {
|
||||
return Err("Unknown HTTP response".into());
|
||||
let msg = format!("Request failed: HTTP {}", res.status());
|
||||
return Err(ErrorKind::HTTPError(msg).into());
|
||||
}
|
||||
} else {
|
||||
return Err(format!("Request failed: HTTP {}", res.status()).into());
|
||||
}
|
||||
}
|
||||
Err("Unable to fetch HTML".into())
|
||||
Err(ErrorKind::HTTPError("Unable to fetch HTML".to_owned()).into())
|
||||
};
|
||||
|
||||
process_request.await.map_err(|mut error: PaperoniError| {
|
||||
error.set_article_source(url);
|
||||
error
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn download_images(
|
||||
extractor: &mut Extractor,
|
||||
article_origin: &Url,
|
||||
) -> async_std::io::Result<()> {
|
||||
bar: &ProgressBar,
|
||||
) -> Result<(), Vec<ImgError>> {
|
||||
if extractor.img_urls.len() > 0 {
|
||||
println!("Downloading images...");
|
||||
debug!(
|
||||
"Downloading {} images for {}",
|
||||
extractor.img_urls.len(),
|
||||
article_origin
|
||||
);
|
||||
}
|
||||
let img_count = extractor.img_urls.len();
|
||||
|
||||
let imgs_req_iter = extractor
|
||||
.img_urls
|
||||
|
@ -67,43 +92,73 @@ pub async fn download_images(
|
|||
.map(|(url, _)| {
|
||||
(
|
||||
url,
|
||||
surf::Client::new().get(get_absolute_url(&url, article_origin)),
|
||||
surf::Client::new()
|
||||
.with(surf::middleware::Redirect::default())
|
||||
.get(get_absolute_url(&url, article_origin)),
|
||||
)
|
||||
})
|
||||
.map(|(url, req)| async move {
|
||||
let mut img_response = req.await.expect("Unable to retrieve image");
|
||||
let img_content: Vec<u8> = img_response.body_bytes().await.unwrap();
|
||||
let img_mime = img_response
|
||||
.content_type()
|
||||
.map(|mime| mime.essence().to_string());
|
||||
let img_ext = img_response
|
||||
.content_type()
|
||||
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
||||
.unwrap();
|
||||
.enumerate()
|
||||
.map(|(img_idx, (url, req))| async move {
|
||||
bar.set_message(format!("Downloading images [{}/{}]", img_idx + 1, img_count).as_str());
|
||||
match req.await {
|
||||
Ok(mut img_response) => {
|
||||
let process_response = async {
|
||||
let img_content: Vec<u8> = match img_response.body_bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
let img_mime = img_response
|
||||
.content_type()
|
||||
.map(|mime| mime.essence().to_string());
|
||||
let img_ext = match img_response
|
||||
.content_type()
|
||||
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
||||
{
|
||||
Some(mime_str) => mime_str,
|
||||
None => {
|
||||
return Err(ErrorKind::HTTPError(
|
||||
"Image has no Content-Type".to_owned(),
|
||||
)
|
||||
.into())
|
||||
}
|
||||
};
|
||||
|
||||
let mut img_path = std::env::temp_dir();
|
||||
img_path.push(format!("{}.{}", hash_url(&url), &img_ext));
|
||||
let mut img_file = File::create(&img_path)
|
||||
.await
|
||||
.expect("Unable to create file");
|
||||
img_file
|
||||
.write_all(&img_content)
|
||||
.await
|
||||
.expect("Unable to save to file");
|
||||
let mut img_path = std::env::temp_dir();
|
||||
img_path.push(format!("{}.{}", hash_url(&url), &img_ext));
|
||||
let mut img_file = match File::create(&img_path).await {
|
||||
Ok(file) => file,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
match img_file.write_all(&img_content).await {
|
||||
Ok(_) => (),
|
||||
Err(e) => return Err(e.into()),
|
||||
}
|
||||
|
||||
(
|
||||
url,
|
||||
img_path
|
||||
.file_name()
|
||||
.map(|os_str_name| {
|
||||
os_str_name
|
||||
.to_str()
|
||||
.expect("Unable to get image file name")
|
||||
.to_string()
|
||||
Ok((
|
||||
url,
|
||||
img_path
|
||||
.file_name()
|
||||
.map(|os_str_name| {
|
||||
os_str_name
|
||||
.to_str()
|
||||
.expect("Unable to get image file name")
|
||||
.to_string()
|
||||
})
|
||||
.unwrap(),
|
||||
img_mime,
|
||||
))
|
||||
};
|
||||
process_response.await.map_err(|mut e: ImgError| {
|
||||
e.set_url(url);
|
||||
e
|
||||
})
|
||||
.unwrap(),
|
||||
img_mime,
|
||||
)
|
||||
}
|
||||
Err(e) => {
|
||||
let mut img_err: ImgError = e.into();
|
||||
img_err.set_url(url);
|
||||
Err(img_err)
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// A utility closure used when update the value of an image source after downloading is successful
|
||||
|
@ -112,8 +167,6 @@ pub async fn download_images(
|
|||
let (img_url, img_path, img_mime) = img_item;
|
||||
let img_ref = extractor
|
||||
.article()
|
||||
.as_mut()
|
||||
.expect("Unable to get mutable ref")
|
||||
.select_first(&format!("img[src='{}']", img_url))
|
||||
.expect("Image node does not exist");
|
||||
let mut img_node = img_ref.attributes.borrow_mut();
|
||||
|
@ -124,14 +177,24 @@ pub async fn download_images(
|
|||
(img_path, img_mime)
|
||||
};
|
||||
|
||||
extractor.img_urls = stream::from_iter(imgs_req_iter)
|
||||
let imgs_req_iter = stream::from_iter(imgs_req_iter)
|
||||
.buffered(10)
|
||||
.collect::<Vec<_>>()
|
||||
.await
|
||||
.into_iter()
|
||||
.map(replace_existing_img_src)
|
||||
.collect();
|
||||
Ok(())
|
||||
.collect::<Vec<Result<_, ImgError>>>()
|
||||
.await;
|
||||
let mut errors = Vec::new();
|
||||
let mut replaced_imgs = Vec::new();
|
||||
for img_req_result in imgs_req_iter {
|
||||
match img_req_result {
|
||||
Ok(img_req) => replaced_imgs.push(replace_existing_img_src(img_req)),
|
||||
Err(e) => errors.push(e),
|
||||
}
|
||||
}
|
||||
extractor.img_urls = replaced_imgs;
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Handles getting the extension from a given MIME subtype.
|
||||
|
|
260
src/logs.rs
Normal file
260
src/logs.rs
Normal file
|
@ -0,0 +1,260 @@
|
|||
use colored::*;
|
||||
use comfy_table::presets::UTF8_HORIZONTAL_BORDERS_ONLY;
|
||||
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
||||
use directories::UserDirs;
|
||||
use flexi_logger::LogSpecBuilder;
|
||||
use log::error;
|
||||
|
||||
use crate::{cli::AppConfig, errors::PaperoniError};
|
||||
|
||||
pub fn display_summary(
|
||||
initial_article_count: usize,
|
||||
succesful_articles_table: Table,
|
||||
partial_downloads_count: usize,
|
||||
errors: Vec<PaperoniError>,
|
||||
) {
|
||||
let successfully_downloaded_count =
|
||||
initial_article_count - partial_downloads_count - errors.len();
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
short_summary(DownloadCount::new(
|
||||
initial_article_count,
|
||||
successfully_downloaded_count,
|
||||
partial_downloads_count,
|
||||
errors.len()
|
||||
))
|
||||
.bold()
|
||||
);
|
||||
|
||||
if successfully_downloaded_count > 0 {
|
||||
println!("{}", succesful_articles_table);
|
||||
}
|
||||
if !errors.is_empty() {
|
||||
println!("\n{}", "Failed article downloads".bright_red().bold());
|
||||
let mut table_failed = Table::new();
|
||||
table_failed
|
||||
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||
.set_header(vec![
|
||||
Cell::new("Link").set_alignment(CellAlignment::Center),
|
||||
Cell::new("Reason").set_alignment(CellAlignment::Center),
|
||||
])
|
||||
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||
|
||||
for error in errors {
|
||||
let error_source = error
|
||||
.article_source()
|
||||
.clone()
|
||||
.unwrap_or_else(|| "<unknown link>".to_string());
|
||||
table_failed.add_row(vec![&error_source, &format!("{}", error.kind())]);
|
||||
error!("{}\n - {}", error, error_source);
|
||||
}
|
||||
println!("{}", table_failed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a string summary of the total number of failed and successful article downloads
|
||||
fn short_summary(download_count: DownloadCount) -> String {
|
||||
// TODO: Refactor this
|
||||
if download_count.total
|
||||
!= download_count.successful + download_count.failed + download_count.partial
|
||||
{
|
||||
panic!("initial_count must be equal to the sum of failed and successful count")
|
||||
}
|
||||
let get_noun = |count: usize| if count == 1 { "article" } else { "articles" };
|
||||
if download_count.successful == download_count.total && download_count.successful == 1 {
|
||||
"Article downloaded successfully".green().to_string()
|
||||
} else if download_count.total == download_count.failed && download_count.failed == 1 {
|
||||
"Article failed to download".red().to_string()
|
||||
} else if download_count.total == download_count.partial && download_count.partial == 1 {
|
||||
"Article partially failed to download".yellow().to_string()
|
||||
} else if download_count.successful == download_count.total {
|
||||
"All articles downloaded successfully".green().to_string()
|
||||
} else if download_count.failed == download_count.total {
|
||||
"All articles failed to download".red().to_string()
|
||||
} else if download_count.partial == download_count.total {
|
||||
"All articles partially failed to download"
|
||||
.yellow()
|
||||
.to_string()
|
||||
} else if download_count.partial == 0 {
|
||||
format!(
|
||||
"{} {} downloaded successfully, {} {} failed",
|
||||
download_count.successful,
|
||||
get_noun(download_count.successful),
|
||||
download_count.failed,
|
||||
get_noun(download_count.failed)
|
||||
)
|
||||
.yellow()
|
||||
.to_string()
|
||||
} else if download_count.successful == 0
|
||||
&& download_count.partial > 0
|
||||
&& download_count.failed > 0
|
||||
{
|
||||
format!(
|
||||
"{} {} partially failed to download, {} {} failed",
|
||||
download_count.partial,
|
||||
get_noun(download_count.partial),
|
||||
download_count.failed,
|
||||
get_noun(download_count.failed)
|
||||
)
|
||||
.yellow()
|
||||
.to_string()
|
||||
} else if download_count.failed == 0
|
||||
&& download_count.successful > 0
|
||||
&& download_count.partial > 0
|
||||
{
|
||||
format!(
|
||||
"{} {} downloaded successfully, {} {} partially failed to download",
|
||||
download_count.successful,
|
||||
get_noun(download_count.successful),
|
||||
download_count.partial,
|
||||
get_noun(download_count.partial)
|
||||
)
|
||||
.yellow()
|
||||
.to_string()
|
||||
} else {
|
||||
format!(
|
||||
"{} {} downloaded successfully, {} {} partially failed to download, {} {} failed",
|
||||
download_count.successful,
|
||||
get_noun(download_count.successful),
|
||||
download_count.partial,
|
||||
get_noun(download_count.partial),
|
||||
download_count.failed,
|
||||
get_noun(download_count.failed)
|
||||
)
|
||||
.yellow()
|
||||
.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
struct DownloadCount {
|
||||
total: usize,
|
||||
successful: usize,
|
||||
partial: usize,
|
||||
failed: usize,
|
||||
}
|
||||
impl DownloadCount {
|
||||
fn new(total: usize, successful: usize, partial: usize, failed: usize) -> Self {
|
||||
Self {
|
||||
total,
|
||||
successful,
|
||||
partial,
|
||||
failed,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init_logger(app_config: &AppConfig) {
|
||||
match UserDirs::new() {
|
||||
Some(user_dirs) => {
|
||||
let home_dir = user_dirs.home_dir();
|
||||
let paperoni_dir = home_dir.join(".paperoni");
|
||||
let log_dir = paperoni_dir.join("logs");
|
||||
|
||||
let log_spec = LogSpecBuilder::new()
|
||||
.module("paperoni", app_config.log_level())
|
||||
.build();
|
||||
let formatted_timestamp = app_config.start_time().format("%Y-%m-%d_%H-%M-%S");
|
||||
let mut logger = flexi_logger::Logger::with(log_spec);
|
||||
|
||||
if app_config.is_logging_to_file() && (!paperoni_dir.is_dir() || !log_dir.is_dir()) {
|
||||
match std::fs::create_dir_all(&log_dir) {
|
||||
Ok(_) => (),
|
||||
Err(e) => {
|
||||
eprintln!("Unable to create paperoni directories on home directory for logging purposes\n{}",e);
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if app_config.is_logging_to_file() {
|
||||
logger = logger
|
||||
.directory(log_dir)
|
||||
.discriminant(formatted_timestamp.to_string())
|
||||
.suppress_timestamp()
|
||||
.log_to_file();
|
||||
}
|
||||
|
||||
match logger.start() {
|
||||
Ok(_) => (),
|
||||
Err(e) => eprintln!("Unable to start logger!\n{}", e),
|
||||
}
|
||||
}
|
||||
None => eprintln!("Unable to get user directories for logging purposes"),
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{short_summary, DownloadCount};
|
||||
use colored::*;
|
||||
#[test]
|
||||
fn test_short_summary() {
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(1, 1, 0, 0)),
|
||||
"Article downloaded successfully".green().to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(1, 0, 0, 1)),
|
||||
"Article failed to download".red().to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(10, 10, 0, 0)),
|
||||
"All articles downloaded successfully".green().to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(10, 0, 0, 10)),
|
||||
"All articles failed to download".red().to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(10, 8, 0, 2)),
|
||||
"8 articles downloaded successfully, 2 articles failed"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(10, 1, 0, 9)),
|
||||
"1 article downloaded successfully, 9 articles failed"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(7, 6, 0, 1)),
|
||||
"6 articles downloaded successfully, 1 article failed"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(7, 4, 2, 1)),
|
||||
"4 articles downloaded successfully, 2 articles partially failed to download, 1 article failed"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(12, 6, 6, 0)),
|
||||
"6 articles downloaded successfully, 6 articles partially failed to download"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(5, 0, 4, 1)),
|
||||
"4 articles partially failed to download, 1 article failed"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
short_summary(DownloadCount::new(4, 0, 4, 0)),
|
||||
"All articles partially failed to download"
|
||||
.yellow()
|
||||
.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic(
|
||||
expected = "initial_count must be equal to the sum of failed and successful count"
|
||||
)]
|
||||
fn test_short_summary_panics_on_invalid_input() {
|
||||
short_summary(DownloadCount::new(0, 12, 0, 43));
|
||||
}
|
||||
}
|
98
src/main.rs
98
src/main.rs
|
@ -3,21 +3,28 @@ extern crate lazy_static;
|
|||
|
||||
use async_std::stream;
|
||||
use async_std::task;
|
||||
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||
use comfy_table::{ContentArrangement, Table};
|
||||
use futures::stream::StreamExt;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use log::{debug, warn};
|
||||
use url::Url;
|
||||
|
||||
mod cli;
|
||||
mod epub;
|
||||
mod errors;
|
||||
mod extractor;
|
||||
/// This module is responsible for async HTTP calls for downloading
|
||||
/// the HTML content and images
|
||||
mod http;
|
||||
mod logs;
|
||||
mod moz_readability;
|
||||
|
||||
use cli::AppConfig;
|
||||
use epub::generate_epubs;
|
||||
use extractor::Extractor;
|
||||
use http::{download_images, fetch_url};
|
||||
use http::{download_images, fetch_html};
|
||||
use logs::display_summary;
|
||||
|
||||
fn main() {
|
||||
let app_config = cli::cli_init();
|
||||
|
@ -28,29 +35,92 @@ fn main() {
|
|||
}
|
||||
|
||||
fn download(app_config: AppConfig) {
|
||||
let mut errors = Vec::new();
|
||||
let mut partial_download_count: usize = 0;
|
||||
let bar = if app_config.can_disable_progress_bar() {
|
||||
ProgressBar::hidden()
|
||||
} else {
|
||||
let enabled_bar = ProgressBar::new(app_config.urls().len() as u64);
|
||||
let style = ProgressStyle::default_bar().template(
|
||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
||||
);
|
||||
enabled_bar.set_style(style);
|
||||
enabled_bar.enable_steady_tick(500);
|
||||
enabled_bar
|
||||
};
|
||||
let articles = task::block_on(async {
|
||||
let urls_iter = app_config.urls().iter().map(|url| fetch_url(url));
|
||||
let urls_iter = app_config.urls().iter().map(|url| fetch_html(url));
|
||||
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn());
|
||||
let mut articles = Vec::new();
|
||||
while let Some(fetch_result) = responses.next().await {
|
||||
match fetch_result {
|
||||
Ok((url, html)) => {
|
||||
println!("Extracting");
|
||||
let mut extractor = Extractor::from_html(&html);
|
||||
extractor.extract_content(&url);
|
||||
|
||||
if extractor.article().is_some() {
|
||||
extractor.extract_img_urls();
|
||||
download_images(&mut extractor, &Url::parse(&url).unwrap())
|
||||
.await
|
||||
.expect("Unable to download images");
|
||||
articles.push(extractor);
|
||||
debug!("Extracting {}", &url);
|
||||
let mut extractor = Extractor::from_html(&html, &url);
|
||||
bar.set_message("Extracting...");
|
||||
match extractor.extract_content() {
|
||||
Ok(_) => {
|
||||
extractor.extract_img_urls();
|
||||
if let Err(img_errors) =
|
||||
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
||||
.await
|
||||
{
|
||||
partial_download_count += 1;
|
||||
warn!(
|
||||
"{} image{} failed to download for {}",
|
||||
img_errors.len(),
|
||||
if img_errors.len() > 1 { "s" } else { "" },
|
||||
url
|
||||
);
|
||||
for img_error in img_errors {
|
||||
warn!(
|
||||
"{}\n\t\tReason {}",
|
||||
img_error.url().as_ref().unwrap(),
|
||||
img_error
|
||||
);
|
||||
}
|
||||
}
|
||||
articles.push(extractor);
|
||||
}
|
||||
Err(mut e) => {
|
||||
e.set_article_source(&url);
|
||||
errors.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => eprintln!("{}", e),
|
||||
Err(e) => errors.push(e),
|
||||
}
|
||||
bar.inc(1);
|
||||
}
|
||||
articles
|
||||
});
|
||||
generate_epubs(articles, app_config.merged());
|
||||
bar.finish_with_message("Downloaded articles");
|
||||
|
||||
let mut succesful_articles_table = Table::new();
|
||||
succesful_articles_table
|
||||
.load_preset(UTF8_FULL)
|
||||
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||
match generate_epubs(articles, &app_config, &mut succesful_articles_table) {
|
||||
Ok(_) => (),
|
||||
Err(gen_epub_errors) => {
|
||||
errors.extend(gen_epub_errors);
|
||||
}
|
||||
};
|
||||
let has_errors = !errors.is_empty();
|
||||
display_summary(
|
||||
app_config.urls().len(),
|
||||
succesful_articles_table,
|
||||
partial_download_count,
|
||||
errors,
|
||||
);
|
||||
if app_config.is_logging_to_file() {
|
||||
println!(
|
||||
"Log written to paperoni_{}.log\n",
|
||||
app_config.start_time().format("%Y-%m-%d_%H-%M-%S")
|
||||
);
|
||||
}
|
||||
if has_errors {
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,8 +7,11 @@ use kuchiki::{
|
|||
traits::*,
|
||||
NodeData, NodeRef,
|
||||
};
|
||||
use log::info;
|
||||
use url::Url;
|
||||
|
||||
use crate::errors::{ErrorKind, PaperoniError};
|
||||
|
||||
const DEFAULT_CHAR_THRESHOLD: usize = 500;
|
||||
const FLAG_STRIP_UNLIKELYS: u32 = 0x1;
|
||||
const FLAG_WEIGHT_CLASSES: u32 = 0x2;
|
||||
|
@ -76,14 +79,15 @@ impl Readability {
|
|||
metadata: MetaData::new(),
|
||||
}
|
||||
}
|
||||
pub fn parse(&mut self, url: &str) {
|
||||
pub fn parse(&mut self, url: &str) -> Result<(), PaperoniError> {
|
||||
self.unwrap_no_script_tags();
|
||||
self.remove_scripts();
|
||||
self.prep_document();
|
||||
self.metadata = self.get_article_metadata();
|
||||
self.article_title = self.metadata.title.clone();
|
||||
self.grab_article();
|
||||
self.grab_article()?;
|
||||
self.post_process_content(url);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Recursively check if node is image, or if node contains exactly only one image
|
||||
|
@ -426,8 +430,7 @@ impl Readability {
|
|||
let mut matches = None;
|
||||
if let Some(property) = node_attr.get("property") {
|
||||
matches = regexes::PROPERTY_REGEX.captures(property);
|
||||
if matches.is_some() {
|
||||
let captures = matches.as_ref().unwrap();
|
||||
if let Some(captures) = &matches {
|
||||
for capture in captures.iter() {
|
||||
let mut name = capture.unwrap().as_str().to_lowercase();
|
||||
name = regexes::REPLACE_WHITESPACE_REGEX
|
||||
|
@ -561,7 +564,7 @@ impl Readability {
|
|||
.root_node
|
||||
.select_first("title")
|
||||
.map(|title| title.text_contents().trim().to_string())
|
||||
.expect("This file has no <title> tag to extract a title from");
|
||||
.unwrap_or("".to_string());
|
||||
let orig_title = cur_title.clone();
|
||||
let mut title_had_hierarchical_separators = false;
|
||||
let word_count = |s: &str| -> usize { s.split_whitespace().count() };
|
||||
|
@ -595,8 +598,8 @@ impl Readability {
|
|||
}
|
||||
} else if cur_title.len() > 150 || cur_title.len() < 15 {
|
||||
let mut h1_nodes = self.root_node.select("h1").unwrap();
|
||||
let (_, h1_count) = h1_nodes.size_hint();
|
||||
if Some(1) == h1_count {
|
||||
let h1_count = self.root_node.select("h1").unwrap().count();
|
||||
if h1_count == 1 {
|
||||
cur_title = Self::get_inner_text(h1_nodes.next().unwrap().as_node(), None);
|
||||
}
|
||||
}
|
||||
|
@ -799,6 +802,7 @@ impl Readability {
|
|||
state = State::ReadProp;
|
||||
decl.1 = Some(token.trim().to_string());
|
||||
tokens.push(decl.clone());
|
||||
decl = (None, None);
|
||||
token.clear();
|
||||
} else {
|
||||
token.push(c);
|
||||
|
@ -819,11 +823,18 @@ impl Readability {
|
|||
}
|
||||
}
|
||||
if !token.is_empty() {
|
||||
decl.1 = Some(token.trim().to_string());
|
||||
tokens.push(decl);
|
||||
match state {
|
||||
State::ReadVal => {
|
||||
decl.1 = Some(token.trim().to_string());
|
||||
tokens.push(decl);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
tokens
|
||||
.into_iter()
|
||||
.filter(|tok_pair| tok_pair.0.is_some() && tok_pair.1.is_some())
|
||||
.map(|tok_pair| (tok_pair.0.unwrap(), tok_pair.1.unwrap()))
|
||||
.collect()
|
||||
}
|
||||
|
@ -1576,16 +1587,14 @@ impl Readability {
|
|||
|
||||
/// Using a variety of metrics (content score, classname, element types), find the content that is most likely to be the stuff
|
||||
/// a user wants to read. Then return it wrapped up in a div.
|
||||
fn grab_article(&mut self) {
|
||||
println!("Grabbing article");
|
||||
fn grab_article(&mut self) -> Result<(), PaperoniError> {
|
||||
info!("Grabbing article {:?}", self.metadata.title);
|
||||
// var doc = this._doc;
|
||||
// var isPaging = (page !== null ? true: false);
|
||||
// page = page ? page : this._doc.body;
|
||||
let page = self.root_node.select_first("body");
|
||||
if page.is_err() {
|
||||
// TODO:Have error logging for this
|
||||
println!("Document has no <body>");
|
||||
return;
|
||||
return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
|
||||
}
|
||||
let page = page.unwrap();
|
||||
let mut attempts: Vec<ExtractAttempt> = Vec::new();
|
||||
|
@ -2075,8 +2084,10 @@ impl Readability {
|
|||
attempts.push(ExtractAttempt::new(article_content.clone(), text_length));
|
||||
attempts.sort_by(|a, b| b.length.partial_cmp(&a.length).unwrap());
|
||||
if attempts.first().as_ref().unwrap().length == 0 {
|
||||
println!("Unable to extract content");
|
||||
break;
|
||||
return Err(ErrorKind::ReadabilityError(
|
||||
"Unable to extract content".into(),
|
||||
)
|
||||
.into());
|
||||
}
|
||||
article_content = attempts[0].article.clone();
|
||||
parse_successful = true;
|
||||
|
@ -2102,7 +2113,8 @@ impl Readability {
|
|||
false
|
||||
});
|
||||
self.article_node = Some(article_content);
|
||||
return;
|
||||
info!("Successfully grabbed article {:?}", self.metadata.title);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2460,12 +2472,24 @@ mod test {
|
|||
css_map.insert("align-items".to_string(), "center".to_string());
|
||||
css_map.insert("border".to_string(), "2px solid black".to_string());
|
||||
|
||||
let css_str_to_vec = Readability::inline_css_str_to_map(css_str);
|
||||
assert_eq!(css_map, css_str_to_vec);
|
||||
let css_str_to_map = Readability::inline_css_str_to_map(css_str);
|
||||
assert_eq!(css_map, css_str_to_map);
|
||||
let mut css_map = HashMap::new();
|
||||
css_map.insert("color".to_string(), "red".to_string());
|
||||
css_map.insert("background-image".to_string(), "url('data:image/jpeg;base64,/wgARCAALABQDASIAAhEBAxEB/8QAFwABAQEBAAAAAAAAAAAAAAAAAgADBP/')".to_string());
|
||||
assert_eq!(css_map, Readability::inline_css_str_to_map("color: red;background-image: url('data:image/jpeg;base64,/wgARCAALABQDASIAAhEBAxEB/8QAFwABAQEBAAAAAAAAAAAAAAAAAgADBP/')"));
|
||||
|
||||
let empty_map = HashMap::new();
|
||||
assert_eq!(empty_map, Readability::inline_css_str_to_map(" \n \t \r"));
|
||||
assert_eq!(empty_map, Readability::inline_css_str_to_map("color"));
|
||||
|
||||
let mut css_map = HashMap::new();
|
||||
css_map.insert("color".to_string(), "red".to_string());
|
||||
css_map.insert("height".to_string(), "300px".to_string());
|
||||
assert_eq!(
|
||||
css_map,
|
||||
Readability::inline_css_str_to_map("color: red;height: 300px;width")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Reference in a new issue