fix: fix ordering issue with merged articles
This commit adds the itertools crate which is used to dedup the Vec when downloading urls fix: fix error message feat: change the serif and mono fonts declarations
This commit is contained in:
parent
4247fab1ea
commit
282d229754
7 changed files with 49 additions and 30 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
@ -758,6 +758,12 @@ dependencies = [
|
|||
"dtoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
|
@ -1247,6 +1253,15 @@ dependencies = [
|
|||
"waker-fn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.7"
|
||||
|
@ -1550,6 +1565,7 @@ dependencies = [
|
|||
"futures",
|
||||
"html5ever",
|
||||
"indicatif",
|
||||
"itertools",
|
||||
"kuchiki",
|
||||
"lazy_static",
|
||||
"log 0.4.14",
|
||||
|
|
|
@ -25,6 +25,7 @@ flexi_logger = "0.17.1"
|
|||
futures = "0.3.15"
|
||||
html5ever = "0.25.1"
|
||||
indicatif = "0.16.2"
|
||||
itertools = "0.10.1"
|
||||
kuchiki = "0.8.1"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
|
|
|
@ -118,10 +118,6 @@ into a single epub using the `merge` flag and specifying the output file.
|
|||
paperoni -f links.txt --merge out.epub
|
||||
```
|
||||
|
||||
### Recommended fonts
|
||||
|
||||
The styling on the EPUB files comes from the [writ.css](https://github.com/causal-agent/writ) library. This uses Palatino as the serif font which you can get online for free. However, you can use whichever serif fonts you have installed.
|
||||
|
||||
### Logging events
|
||||
|
||||
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:
|
||||
|
|
2
src/assets/writ.min.css
vendored
2
src/assets/writ.min.css
vendored
|
@ -4,4 +4,4 @@
|
|||
* Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
|
||||
*
|
||||
* https://cmcenroe.me/writ/LICENSE (ISC)
|
||||
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Palatino,Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Consolas,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
|
||||
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
|
||||
|
|
31
src/cli.rs
31
src/cli.rs
|
@ -1,8 +1,9 @@
|
|||
use std::{collections::BTreeSet, fs, num::NonZeroUsize, path::Path};
|
||||
use std::{fs, num::NonZeroUsize, path::Path};
|
||||
|
||||
use chrono::{DateTime, Local};
|
||||
use clap::{App, AppSettings, Arg, ArgMatches};
|
||||
use flexi_logger::LevelFilter as LogLevel;
|
||||
use itertools::Itertools;
|
||||
|
||||
type Error = crate::errors::CliError<AppConfigBuilderError>;
|
||||
|
||||
|
@ -126,24 +127,24 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
|||
};
|
||||
let direct_urls = arg_matches
|
||||
.values_of("urls")
|
||||
.and_then(|urls| urls.map(url_filter).collect::<Option<BTreeSet<_>>>());
|
||||
.and_then(|urls| urls.map(url_filter).collect::<Option<Vec<_>>>())
|
||||
.unwrap_or(Vec::new());
|
||||
let file_urls = arg_matches
|
||||
.value_of("file")
|
||||
.map(fs::read_to_string)
|
||||
.transpose()?
|
||||
.and_then(|content| {
|
||||
content
|
||||
.lines()
|
||||
.map(url_filter)
|
||||
.collect::<Option<BTreeSet<_>>>()
|
||||
});
|
||||
match (direct_urls, file_urls) {
|
||||
(Some(direct_urls), Some(file_urls)) => Ok(direct_urls
|
||||
.union(&file_urls)
|
||||
.map(ToOwned::to_owned)
|
||||
.collect::<Vec<_>>()),
|
||||
(Some(urls), None) | (None, Some(urls)) => Ok(urls.into_iter().collect()),
|
||||
(None, None) => Err(Error::NoUrls),
|
||||
.and_then(|content| content.lines().map(url_filter).collect::<Option<Vec<_>>>())
|
||||
.unwrap_or(Vec::new());
|
||||
|
||||
let urls = [direct_urls, file_urls]
|
||||
.concat()
|
||||
.into_iter()
|
||||
.unique()
|
||||
.collect_vec();
|
||||
if !urls.is_empty() {
|
||||
Ok(urls)
|
||||
} else {
|
||||
Err(Error::NoUrls)
|
||||
}
|
||||
}?)
|
||||
.max_conn(match arg_matches.value_of("max-conn") {
|
||||
|
|
|
@ -152,7 +152,7 @@ pub enum CliError<BuilderError: Debug + Display> {
|
|||
InvalidOutputPath(String),
|
||||
#[error("Wrong output directory")]
|
||||
WrongOutputDirectory,
|
||||
#[error("Output directory not exists")]
|
||||
#[error("Output directory does not exist")]
|
||||
OutputDirectoryNotExists,
|
||||
#[error("Unable to start logger!\n{0}")]
|
||||
LogError(#[from] LogError),
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use itertools::Itertools;
|
||||
use kuchiki::{traits::*, NodeRef};
|
||||
|
||||
use crate::errors::PaperoniError;
|
||||
|
@ -54,15 +55,19 @@ impl Extractor {
|
|||
/// Traverses the DOM tree of the content and retrieves the IMG URLs
|
||||
pub fn extract_img_urls(&mut self) {
|
||||
if let Some(content_ref) = &self.article {
|
||||
for img_ref in content_ref.select("img").unwrap() {
|
||||
img_ref.as_node().as_element().map(|img_elem| {
|
||||
img_elem.attributes.borrow().get("src").map(|img_url| {
|
||||
if !(img_url.is_empty() || img_url.starts_with("data:image")) {
|
||||
self.img_urls.push((img_url.to_string(), None))
|
||||
}
|
||||
self.img_urls = content_ref
|
||||
.select("img")
|
||||
.unwrap()
|
||||
.filter_map(|img_ref| {
|
||||
let attrs = img_ref.attributes.borrow();
|
||||
attrs
|
||||
.get("src")
|
||||
.filter(|val| !(val.is_empty() || val.starts_with("data:image")))
|
||||
.map(ToString::to_string)
|
||||
})
|
||||
});
|
||||
}
|
||||
.unique()
|
||||
.map(|val| (val, None))
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue