Update documentation
This commit is contained in:
parent
c00582ac29
commit
cae9227ab0
4 changed files with 44 additions and 10 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1482,7 +1482,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.3.0-alpha1"
|
version = "0.4.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"chrono",
|
"chrono",
|
||||||
|
|
|
@ -3,7 +3,7 @@ description = "A web article downloader"
|
||||||
homepage = "https://github.com/hipstermojo/paperoni"
|
homepage = "https://github.com/hipstermojo/paperoni"
|
||||||
repository = "https://github.com/hipstermojo/paperoni"
|
repository = "https://github.com/hipstermojo/paperoni"
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.3.0-alpha1"
|
version = "0.4.0-alpha1"
|
||||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
43
README.md
43
README.md
|
@ -2,7 +2,8 @@
|
||||||
|
|
||||||
<p align="center"><i>Salami not included</i></p>
|
<p align="center"><i>Salami not included</i></p>
|
||||||
|
|
||||||
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
![crates.io](https://img.shields.io/crates/v/paperoni.svg)
|
||||||
|
Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs.
|
||||||
|
|
||||||
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
> This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.
|
||||||
|
|
||||||
|
@ -17,7 +18,7 @@ Check the [releases](https://github.com/hipstermojo/paperoni/releases) page for
|
||||||
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cargo install paperoni --version 0.3.0-alpha1
|
cargo install paperoni --version 0.4.0-alpha1
|
||||||
```
|
```
|
||||||
|
|
||||||
_Paperoni is still in alpha so the `version` flag has to be passed._
|
_Paperoni is still in alpha so the `version` flag has to be passed._
|
||||||
|
@ -37,6 +38,27 @@ cargo run -- # pass your url here
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
USAGE:
|
||||||
|
paperoni [OPTIONS] [urls]...
|
||||||
|
|
||||||
|
OPTIONS:
|
||||||
|
-f, --file <file> Input file containing links
|
||||||
|
-h, --help Prints help information
|
||||||
|
--log-to-file Enables logging of events to a file located in .paperoni/logs with a default log level
|
||||||
|
of debug. Use -v to specify the logging level
|
||||||
|
--max_conn <max_conn> The maximum number of concurrent HTTP connections when downloading articles. Default is
|
||||||
|
8
|
||||||
|
--merge <output_name> Merge multiple articles into a single epub
|
||||||
|
-V, --version Prints version information
|
||||||
|
-v Enables logging of events and set the verbosity level. Use -h to read on its usage
|
||||||
|
|
||||||
|
ARGS:
|
||||||
|
<urls>... Urls of web articles
|
||||||
|
```
|
||||||
|
|
||||||
|
To download a single article pass in its URL
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
paperoni https://en.wikipedia.org/wiki/Pepperoni
|
paperoni https://en.wikipedia.org/wiki/Pepperoni
|
||||||
```
|
```
|
||||||
|
@ -68,10 +90,23 @@ into a single epub using the `merge` flag and specifying the output file.
|
||||||
paperoni -f links.txt --merge out.epub
|
paperoni -f links.txt --merge out.epub
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Logging events
|
||||||
|
|
||||||
|
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
||||||
|
|
||||||
|
```
|
||||||
|
-v Logs only the error level
|
||||||
|
-vv Logs only the warn level
|
||||||
|
-vvv Logs only the info level
|
||||||
|
-vvvv Logs only the debug level
|
||||||
|
```
|
||||||
|
|
||||||
|
If only the `-v` flag is passed, the progress bars are disabled. If both `-v` and `--log-to-file` are passed then the progress bars will still be shown.
|
||||||
|
|
||||||
## How it works
|
## How it works
|
||||||
|
|
||||||
The URL passed to Paperoni is fetched and the returned HTML response is passed to the extractor.
|
The URL passed to Paperoni is fetched and the returned HTML response is passed to the extractor.
|
||||||
This extractor retrieves a possible article using a port of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
This extractor retrieves a possible article using a [custom port](https://github.com/hipstermojo/paperoni/blob/master/src/moz_readability/mod.rs) of the [Mozilla Readability algorithm](https://github.com/mozilla/readability). This article is then saved in an EPUB.
|
||||||
|
|
||||||
> The port of the algorithm is still unstable as well so it is not fully compatible with all the websites that can be extracted using Readability.
|
> The port of the algorithm is still unstable as well so it is not fully compatible with all the websites that can be extracted using Readability.
|
||||||
|
|
||||||
|
@ -82,3 +117,5 @@ This program is still in alpha so a number of things won't work:
|
||||||
- Websites that only run with JavaScript cannot be extracted.
|
- Websites that only run with JavaScript cannot be extracted.
|
||||||
- Website articles that cannot be extracted by Readability cannot be extracted by Paperoni either.
|
- Website articles that cannot be extracted by Readability cannot be extracted by Paperoni either.
|
||||||
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
- Code snippets on Medium articles that are lazy loaded will not appear in the EPUB.
|
||||||
|
|
||||||
|
There are also web pages it won't work on in general such as Twitter and Reddit threads.
|
||||||
|
|
|
@ -14,10 +14,7 @@ pub fn cli_init() -> AppConfig {
|
||||||
])
|
])
|
||||||
.version(clap::crate_version!())
|
.version(clap::crate_version!())
|
||||||
.about(
|
.about(
|
||||||
"
|
"Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs",
|
||||||
Paperoni is an article downloader.
|
|
||||||
It takes a url, downloads the article content from it and saves it to an epub.
|
|
||||||
",
|
|
||||||
)
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("urls")
|
Arg::with_name("urls")
|
||||||
|
@ -47,7 +44,7 @@ It takes a url, downloads the article content from it and saves it to an epub.
|
||||||
Arg::with_name("verbosity")
|
Arg::with_name("verbosity")
|
||||||
.short("v")
|
.short("v")
|
||||||
.multiple(true)
|
.multiple(true)
|
||||||
.help("Enables logging of events and set the verbosity level. Use -h to read on its usage")
|
.help("Enables logging of events and set the verbosity level. Use --help to read on its usage")
|
||||||
.long_help(
|
.long_help(
|
||||||
"This takes upto 4 levels of verbosity in the following order.
|
"This takes upto 4 levels of verbosity in the following order.
|
||||||
- Error (-v)
|
- Error (-v)
|
||||||
|
|
Reference in a new issue