Fix alignment in README

Update manifest file
Add fix in serialized file to have self closing tags which is invalid
xhtml
This commit is contained in:
Kenneth Gitere 2020-10-22 19:10:11 +03:00
parent 6aef1631e3
commit be48cc1e47
6 changed files with 14 additions and 5 deletions

2
Cargo.lock generated
View file

@ -1010,7 +1010,7 @@ dependencies = [
[[package]]
name = "paperoni"
version = "0.1.0"
version = "0.1.0-alpha1"
dependencies = [
"async-std",
"epub-builder",

View file

@ -1,6 +1,9 @@
[package]
description = "A web article downloader"
homepage = "https://github.com/hipstermojo/paperoni"
repository = "https://github.com/hipstermojo/paperoni"
name = "paperoni"
version = "0.1.0"
version = "0.1.0-alpha1"
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
edition = "2018"
license = "MIT"

View file

@ -1,6 +1,6 @@
<img src="./paperoni-dark.png" width="400" style="display: block;margin-left: auto; margin-right: auto;">
<p align="center"><img src="./paperoni-dark.png" width="400"></p>
<p style="text-align:center;"><i>Salami not included</i></p>
<p align="center"><i>Salami not included</i></p>
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.

View file

@ -65,6 +65,8 @@ fn download(urls: Vec<String>) {
.serialize(&mut html_buf)
.expect("Unable to serialize");
let html_buf = std::str::from_utf8(&html_buf).unwrap();
let html_buf = moz_readability::regexes::REPLACE_SELF_CLOSING_REGEX
.replace_all(html_buf, "$tag/>");
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
if let Some(author) = extractor.metadata().byline() {
epub.metadata("author", author).unwrap();

View file

@ -46,7 +46,7 @@ const DATA_TABLE_DESCENDANTS: [&str; 5] = ["col", "colgroup", "tfoot", "thead",
// TODO: Change to HashSet
const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "pre"];
mod regexes;
pub mod regexes;
pub struct Readability {
root_node: NodeRef,

View file

@ -132,4 +132,8 @@ lazy_static! {
pub static ref REPLACE_END_SEPARATOR_REGEX: Regex =
Regex::new(r"(?i)[^\|\-\\/>»]*[\|\-\\/>»](?P<end>.*)").unwrap();
pub static ref REPLACE_MULTI_SEPARATOR_REGEX: Regex = Regex::new(r"[\|\-\\/>»]+").unwrap();
pub static ref REPLACE_SELF_CLOSING_REGEX: Regex = Regex::new(
r#"(?P<tag><(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)(?: [a-z\-]+=["'][\sa-zA-Z0-9\./\-_#]+["']|[a-z\-]+)*)>"#
)
.unwrap();
}