Update crates and fix bugs

The bug fixes are for:
- <base> elements with "/" as the href
- articles containing an ampersand in the title which would create
  corrupted manifest files.
This commit is contained in:
Kenneth Gitere 2020-11-23 15:55:58 +03:00
parent ef3efdba81
commit aff4054ca9
3 changed files with 14 additions and 8 deletions

View file

@ -12,13 +12,13 @@ readme = "README.md"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
async-std = "1.5.0" async-std = "1.7.0"
epub-builder = "0.4.5" epub-builder = "0.4.8"
html5ever = "0.25.1" html5ever = "0.25.1"
kuchiki = "0.8.1" kuchiki = "0.8.1"
lazy_static = "1.3.9" lazy_static = "1.4.0"
md5 = "0.7.0" md5 = "0.7.0"
regex = "1.3.9" regex = "1.4.2"
surf = "2.1.0" surf = "2.1.0"
structopt = { version = "0.3" } structopt = { version = "0.3" }
url = "2.1.1" url = "2.2.0"

View file

@ -66,9 +66,10 @@ fn download(urls: Vec<String>) {
.replace_all(html_buf, "$tag/>"); .replace_all(html_buf, "$tag/>");
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
if let Some(author) = extractor.metadata().byline() { if let Some(author) = extractor.metadata().byline() {
epub.metadata("author", author).unwrap(); epub.metadata("author", author.replace("&", "&amp;"))
.unwrap();
} }
epub.metadata("title", extractor.metadata().title()) epub.metadata("title", extractor.metadata().title().replace("&", "&amp;"))
.unwrap(); .unwrap();
epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes())) epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
.unwrap(); .unwrap();

View file

@ -653,7 +653,12 @@ impl Readability {
}) })
.map(|node_ref| { .map(|node_ref| {
let node_attrs = node_ref.attributes.borrow(); let node_attrs = node_ref.attributes.borrow();
Url::parse(node_attrs.get("href").unwrap()).unwrap() let href = node_attrs.get("href").unwrap();
if href.trim() == "/" {
document_uri.join("/").unwrap()
} else {
Url::parse(href).unwrap()
}
}) })
.next() .next()
.unwrap_or(document_uri.clone()); .unwrap_or(document_uri.clone());