Fix alignment in README
Update manifest file Add fix in serialized file to have self closing tags which is invalid xhtml
This commit is contained in:
parent
6aef1631e3
commit
be48cc1e47
6 changed files with 14 additions and 5 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1010,7 +1010,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "paperoni"
|
||||
version = "0.1.0"
|
||||
version = "0.1.0-alpha1"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"epub-builder",
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
[package]
|
||||
description = "A web article downloader"
|
||||
homepage = "https://github.com/hipstermojo/paperoni"
|
||||
repository = "https://github.com/hipstermojo/paperoni"
|
||||
name = "paperoni"
|
||||
version = "0.1.0"
|
||||
version = "0.1.0-alpha1"
|
||||
authors = ["Kenneth Gitere <gitere81@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
<img src="./paperoni-dark.png" width="400" style="display: block;margin-left: auto; margin-right: auto;">
|
||||
<p align="center"><img src="./paperoni-dark.png" width="400"></p>
|
||||
|
||||
<p style="text-align:center;"><i>Salami not included</i></p>
|
||||
<p align="center"><i>Salami not included</i></p>
|
||||
|
||||
Paperoni is a web article downloader written in Rust. The downloaded articles are then exported as EPUB files.
|
||||
|
||||
|
|
|
@ -65,6 +65,8 @@ fn download(urls: Vec<String>) {
|
|||
.serialize(&mut html_buf)
|
||||
.expect("Unable to serialize");
|
||||
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
||||
let html_buf = moz_readability::regexes::REPLACE_SELF_CLOSING_REGEX
|
||||
.replace_all(html_buf, "$tag/>");
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
if let Some(author) = extractor.metadata().byline() {
|
||||
epub.metadata("author", author).unwrap();
|
||||
|
|
|
@ -46,7 +46,7 @@ const DATA_TABLE_DESCENDANTS: [&str; 5] = ["col", "colgroup", "tfoot", "thead",
|
|||
// TODO: Change to HashSet
|
||||
const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "pre"];
|
||||
|
||||
mod regexes;
|
||||
pub mod regexes;
|
||||
|
||||
pub struct Readability {
|
||||
root_node: NodeRef,
|
||||
|
|
|
@ -132,4 +132,8 @@ lazy_static! {
|
|||
pub static ref REPLACE_END_SEPARATOR_REGEX: Regex =
|
||||
Regex::new(r"(?i)[^\|\-\\/>»]*[\|\-\\/>»](?P<end>.*)").unwrap();
|
||||
pub static ref REPLACE_MULTI_SEPARATOR_REGEX: Regex = Regex::new(r"[\|\-\\/>»]+").unwrap();
|
||||
pub static ref REPLACE_SELF_CLOSING_REGEX: Regex = Regex::new(
|
||||
r#"(?P<tag><(?:area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)(?: [a-z\-]+=["'][\sa-zA-Z0-9\./\-_#]+["']|[a-z\-]+)*)>"#
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
|
Reference in a new issue