fix: fix ordering issue with merged articles

This commit adds the itertools crate which is used to dedup the Vec
when downloading urls

fix: fix error message
feat: change the serif and mono fonts declarations
This commit is contained in:
Kenneth Gitere 2021-06-10 20:16:31 +03:00
parent 4247fab1ea
commit 282d229754
7 changed files with 49 additions and 30 deletions

16
Cargo.lock generated
View file

@ -758,6 +758,12 @@ dependencies = [
"dtoa",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "encode_unicode"
version = "0.3.6"
@ -1247,6 +1253,15 @@ dependencies = [
"waker-fn",
]
[[package]]
name = "itertools"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.7"
@ -1550,6 +1565,7 @@ dependencies = [
"futures",
"html5ever",
"indicatif",
"itertools",
"kuchiki",
"lazy_static",
"log 0.4.14",

View file

@ -25,6 +25,7 @@ flexi_logger = "0.17.1"
futures = "0.3.15"
html5ever = "0.25.1"
indicatif = "0.16.2"
itertools = "0.10.1"
kuchiki = "0.8.1"
lazy_static = "1.4.0"
log = "0.4.14"

View file

@ -118,10 +118,6 @@ into a single epub using the `merge` flag and specifying the output file.
paperoni -f links.txt --merge out.epub
```
### Recommended fonts
The styling on the EPUB files comes from the [writ.css](https://github.com/causal-agent/writ) library. This uses Palatino as the serif font which you can get online for free. However, you can use whichever serif fonts you have installed.
### Logging events
Logging is disabled by default. This can be activated by either using the `-v` flag or `--log-to-file` flag. If the `--log-to-file` flag is passed the logs are sent to a file in the default Paperoni directory `.paperoni/logs` which is on your home directory. The `-v` flag configures the verbosity levels such that:

View file

@ -4,4 +4,4 @@
* Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
*
* https://cmcenroe.me/writ/LICENSE (ISC)
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Palatino,Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Consolas,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
*/dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}

View file

@ -1,8 +1,9 @@
use std::{collections::BTreeSet, fs, num::NonZeroUsize, path::Path};
use std::{fs, num::NonZeroUsize, path::Path};
use chrono::{DateTime, Local};
use clap::{App, AppSettings, Arg, ArgMatches};
use flexi_logger::LevelFilter as LogLevel;
use itertools::Itertools;
type Error = crate::errors::CliError<AppConfigBuilderError>;
@ -126,24 +127,24 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
};
let direct_urls = arg_matches
.values_of("urls")
.and_then(|urls| urls.map(url_filter).collect::<Option<BTreeSet<_>>>());
.and_then(|urls| urls.map(url_filter).collect::<Option<Vec<_>>>())
.unwrap_or(Vec::new());
let file_urls = arg_matches
.value_of("file")
.map(fs::read_to_string)
.transpose()?
.and_then(|content| {
content
.lines()
.map(url_filter)
.collect::<Option<BTreeSet<_>>>()
});
match (direct_urls, file_urls) {
(Some(direct_urls), Some(file_urls)) => Ok(direct_urls
.union(&file_urls)
.map(ToOwned::to_owned)
.collect::<Vec<_>>()),
(Some(urls), None) | (None, Some(urls)) => Ok(urls.into_iter().collect()),
(None, None) => Err(Error::NoUrls),
.and_then(|content| content.lines().map(url_filter).collect::<Option<Vec<_>>>())
.unwrap_or(Vec::new());
let urls = [direct_urls, file_urls]
.concat()
.into_iter()
.unique()
.collect_vec();
if !urls.is_empty() {
Ok(urls)
} else {
Err(Error::NoUrls)
}
}?)
.max_conn(match arg_matches.value_of("max-conn") {

View file

@ -152,7 +152,7 @@ pub enum CliError<BuilderError: Debug + Display> {
InvalidOutputPath(String),
#[error("Wrong output directory")]
WrongOutputDirectory,
#[error("Output directory not exists")]
#[error("Output directory does not exist")]
OutputDirectoryNotExists,
#[error("Unable to start logger!\n{0}")]
LogError(#[from] LogError),

View file

@ -1,5 +1,6 @@
use std::collections::HashMap;
use itertools::Itertools;
use kuchiki::{traits::*, NodeRef};
use crate::errors::PaperoniError;
@ -54,15 +55,19 @@ impl Extractor {
/// Traverses the DOM tree of the content and retrieves the IMG URLs
pub fn extract_img_urls(&mut self) {
if let Some(content_ref) = &self.article {
for img_ref in content_ref.select("img").unwrap() {
img_ref.as_node().as_element().map(|img_elem| {
img_elem.attributes.borrow().get("src").map(|img_url| {
if !(img_url.is_empty() || img_url.starts_with("data:image")) {
self.img_urls.push((img_url.to_string(), None))
}
})
});
}
self.img_urls = content_ref
.select("img")
.unwrap()
.filter_map(|img_ref| {
let attrs = img_ref.attributes.borrow();
attrs
.get("src")
.filter(|val| !(val.is_empty() || val.starts_with("data:image")))
.map(ToString::to_string)
})
.unique()
.map(|val| (val, None))
.collect();
}
}