Fix alignment in README
Update manifest file Add fix in serialized file to have self closing tags which is invalid xhtml
This commit is contained in:
parent
6aef1631e3
commit
be48cc1e47
6 changed files with 14 additions and 5 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1010,7 +1010,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.1.0"
|
version = "0.1.0-alpha1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"epub-builder",
|
"epub-builder",
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
[package]
|
[package]
|
||||||
|
description = "A web article downloader"
|
||||||
|
homepage = "https://github.com/hipstermojo/paperoni"
|
||||||
|
repository = "https://github.com/hipstermojo/paperoni"
|
||||||
name = "paperoni"
|
name = "paperoni"
|
||||||
version = "0.1.0"
|
version = "0.1.0-alpha1"
|
||||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
<img src="./paperoni-dark.png" width="400" style="display: block;margin-left: auto; margin-right: auto;">
|
<p align="center"><img src="./paperoni-dark.png" width="400"></p>
|
||||||
|
|
||||||
<p style="text-align:center;"><i>Salami not included</i></p>
|
<p align="center"><i>Salami not included</i></p>
|
||||||
|
|
||||||
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,8 @@ fn download(urls: Vec<String>) {
|
||||||
.serialize(&mut html_buf)
|
.serialize(&mut html_buf)
|
||||||
.expect("Unable to serialize");
|
.expect("Unable to serialize");
|
||||||
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
||||||
|
let html_buf = moz_readability::regexes::REPLACE_SELF_CLOSING_REGEX
|
||||||
|
.replace_all(html_buf, "$tag/>");
|
||||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||||
if let Some(author) = extractor.metadata().byline() {
|
if let Some(author) = extractor.metadata().byline() {
|
||||||
epub.metadata("author", author).unwrap();
|
epub.metadata("author", author).unwrap();
|
||||||
|
|
|
@ -46,7 +46,7 @@ const DATA_TABLE_DESCENDANTS: [&str; 5] = ["col", "colgroup", "tfoot", "thead",
|
||||||
// TODO: Change to HashSet
|
// TODO: Change to HashSet
|
||||||
const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "pre"];
|
const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "pre"];
|
||||||
|
|
||||||
mod regexes;
|
pub mod regexes;
|
||||||
|
|
||||||
pub struct Readability {
|
pub struct Readability {
|
||||||
root_node: NodeRef,
|
root_node: NodeRef,
|
||||||
|
|
|
@ -132,4 +132,8 @@ lazy_static! {
|
||||||
pub static ref REPLACE_END_SEPARATOR_REGEX: Regex =
|
pub static ref REPLACE_END_SEPARATOR_REGEX: Regex =
|
||||||
Regex::new(r"(?i)[^\|\-\\/>»]*[\|\-\\/>»](?P<end>.*)").unwrap();
|
Regex::new(r"(?i)[^\|\-\\/>»]*[\|\-\\/>»](?P<end>.*)").unwrap();
|
||||||
pub static ref REPLACE_MULTI_SEPARATOR_REGEX: Regex = Regex::new(r"[\|\-\\/>»]+").unwrap();
|
pub static ref REPLACE_MULTI_SEPARATOR_REGEX: Regex = Regex::new(r"[\|\-\\/>»]+").unwrap();
|
||||||
|
pub static ref REPLACE_SELF_CLOSING_REGEX: Regex = Regex::new(
|
||||||
|
r#"(?P<tag><(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)(?: [a-z\-]+=["'][\sa-zA-Z0-9\./\-_#]+["']|[a-z\-]+)*)>"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue