fix: fix ordering issue with merged articles
This commit adds the itertools crate which is used to dedup the Vec when downloading urls fix: fix error message feat: change the serif and mono fonts declarations
This commit is contained in:
parent
4247fab1ea
commit
282d229754
7 changed files with 49 additions and 30 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
@ -758,6 +758,12 @@ dependencies = [
|
||||||
"dtoa",
|
"dtoa",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encode_unicode"
|
name = "encode_unicode"
|
||||||
version = "0.3.6"
|
version = "0.3.6"
|
||||||
|
@ -1247,6 +1253,15 @@ dependencies = [
|
||||||
"waker-fn",
|
"waker-fn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.10.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "0.4.7"
|
version = "0.4.7"
|
||||||
|
@ -1550,6 +1565,7 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"html5ever",
|
"html5ever",
|
||||||
"indicatif",
|
"indicatif",
|
||||||
|
"itertools",
|
||||||
"kuchiki",
|
"kuchiki",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log 0.4.14",
|
"log 0.4.14",
|
||||||
|
|
|
@ -25,6 +25,7 @@ flexi_logger = "0.17.1"
|
||||||
futures = "0.3.15"
|
futures = "0.3.15"
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
indicatif = "0.16.2"
|
indicatif = "0.16.2"
|
||||||
|
itertools = "0.10.1"
|
||||||
kuchiki = "0.8.1"
|
kuchiki = "0.8.1"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
|
|
|
@ -118,10 +118,6 @@ into a single epub using the `merge` flag and specifying the output file.
|
||||||
paperoni -f links.txt --merge out.epub
|
paperoni -f links.txt --merge out.epub
|
||||||
```
|
```
|
||||||
|
|
||||||
### Recommended fonts
|
|
||||||
|
|
||||||
The styling on the EPUB files comes from the [writ.css](https://github.com/causal-agent/writ) library. This uses Palatino as the serif font which you can get online for free. However, you can use whichever serif fonts you have installed.
|
|
||||||
|
|
||||||
### Logging events
|
### Logging events
|
||||||
|
|
||||||
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
||||||
|
|
2
src/assets/writ.min.css
vendored
2
src/assets/writ.min.css
vendored
|
@ -4,4 +4,4 @@
|
||||||
* Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
|
* Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
|
||||||
*
|
*
|
||||||
* https://cmcenroe.me/writ/LICENSE (ISC)
|
* https://cmcenroe.me/writ/LICENSE (ISC)
|
||||||
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Palatino,Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Consolas,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
|
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
|
||||||
|
|
31
src/cli.rs
31
src/cli.rs
|
@ -1,8 +1,9 @@
|
||||||
use std::{collections::BTreeSet, fs, num::NonZeroUsize, path::Path};
|
use std::{fs, num::NonZeroUsize, path::Path};
|
||||||
|
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
use clap::{App, AppSettings, Arg, ArgMatches};
|
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||||
use flexi_logger::LevelFilter as LogLevel;
|
use flexi_logger::LevelFilter as LogLevel;
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
type Error = crate::errors::CliError<AppConfigBuilderError>;
|
type Error = crate::errors::CliError<AppConfigBuilderError>;
|
||||||
|
|
||||||
|
@ -126,24 +127,24 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
||||||
};
|
};
|
||||||
let direct_urls = arg_matches
|
let direct_urls = arg_matches
|
||||||
.values_of("urls")
|
.values_of("urls")
|
||||||
.and_then(|urls| urls.map(url_filter).collect::<Option<BTreeSet<_>>>());
|
.and_then(|urls| urls.map(url_filter).collect::<Option<Vec<_>>>())
|
||||||
|
.unwrap_or(Vec::new());
|
||||||
let file_urls = arg_matches
|
let file_urls = arg_matches
|
||||||
.value_of("file")
|
.value_of("file")
|
||||||
.map(fs::read_to_string)
|
.map(fs::read_to_string)
|
||||||
.transpose()?
|
.transpose()?
|
||||||
.and_then(|content| {
|
.and_then(|content| content.lines().map(url_filter).collect::<Option<Vec<_>>>())
|
||||||
content
|
.unwrap_or(Vec::new());
|
||||||
.lines()
|
|
||||||
.map(url_filter)
|
let urls = [direct_urls, file_urls]
|
||||||
.collect::<Option<BTreeSet<_>>>()
|
.concat()
|
||||||
});
|
.into_iter()
|
||||||
match (direct_urls, file_urls) {
|
.unique()
|
||||||
(Some(direct_urls), Some(file_urls)) => Ok(direct_urls
|
.collect_vec();
|
||||||
.union(&file_urls)
|
if !urls.is_empty() {
|
||||||
.map(ToOwned::to_owned)
|
Ok(urls)
|
||||||
.collect::<Vec<_>>()),
|
} else {
|
||||||
(Some(urls), None) | (None, Some(urls)) => Ok(urls.into_iter().collect()),
|
Err(Error::NoUrls)
|
||||||
(None, None) => Err(Error::NoUrls),
|
|
||||||
}
|
}
|
||||||
}?)
|
}?)
|
||||||
.max_conn(match arg_matches.value_of("max-conn") {
|
.max_conn(match arg_matches.value_of("max-conn") {
|
||||||
|
|
|
@ -152,7 +152,7 @@ pub enum CliError<BuilderError: Debug + Display> {
|
||||||
InvalidOutputPath(String),
|
InvalidOutputPath(String),
|
||||||
#[error("Wrong output directory")]
|
#[error("Wrong output directory")]
|
||||||
WrongOutputDirectory,
|
WrongOutputDirectory,
|
||||||
#[error("Output directory not exists")]
|
#[error("Output directory does not exist")]
|
||||||
OutputDirectoryNotExists,
|
OutputDirectoryNotExists,
|
||||||
#[error("Unable to start logger!\n{0}")]
|
#[error("Unable to start logger!\n{0}")]
|
||||||
LogError(#[from] LogError),
|
LogError(#[from] LogError),
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
use kuchiki::{traits::*, NodeRef};
|
use kuchiki::{traits::*, NodeRef};
|
||||||
|
|
||||||
use crate::errors::PaperoniError;
|
use crate::errors::PaperoniError;
|
||||||
|
@ -54,15 +55,19 @@ impl Extractor {
|
||||||
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
||||||
pub fn extract_img_urls(&mut self) {
|
pub fn extract_img_urls(&mut self) {
|
||||||
if let Some(content_ref) = &self.article {
|
if let Some(content_ref) = &self.article {
|
||||||
for img_ref in content_ref.select("img").unwrap() {
|
self.img_urls = content_ref
|
||||||
img_ref.as_node().as_element().map(|img_elem| {
|
.select("img")
|
||||||
img_elem.attributes.borrow().get("src").map(|img_url| {
|
.unwrap()
|
||||||
if !(img_url.is_empty() || img_url.starts_with("data:image")) {
|
.filter_map(|img_ref| {
|
||||||
self.img_urls.push((img_url.to_string(), None))
|
let attrs = img_ref.attributes.borrow();
|
||||||
}
|
attrs
|
||||||
})
|
.get("src")
|
||||||
});
|
.filter(|val| !(val.is_empty() || val.starts_with("data:image")))
|
||||||
}
|
.map(ToString::to_string)
|
||||||
|
})
|
||||||
|
.unique()
|
||||||
|
.map(|val| (val, None))
|
||||||
|
.collect();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Reference in a new issue