Bug fixes

The bug fixes include:
- `<html>` nodes being added to the replaced image when `unwrap_noscript_tags`
  is called.
- Remove `srcset` attribute of <img> tags after downloading the image. This
  prevented readers like Foliate from displaying the downloaded image
This commit is contained in:
Kenneth Gitere 2021-01-12 10:21:11 +03:00
parent 8407c613df
commit 1dc7b3432b
2 changed files with 7 additions and 5 deletions

View file

@ -68,7 +68,9 @@ impl Extractor {
pub async fn download_images(&mut self, article_origin: &Url) -> async_std::io::Result<()> {
let mut async_download_tasks = Vec::with_capacity(self.img_urls.len());
self.extract_img_urls();
println!("Downloading images...");
if self.img_urls.len() > 0 {
println!("Downloading images...");
}
for img_url in &self.img_urls {
let img_url = img_url.0.clone();
let abs_url = get_absolute_url(&img_url, article_origin);
@ -129,6 +131,9 @@ impl Extractor {
.expect("Image node does not exist");
let mut img_node = img_ref.attributes.borrow_mut();
*img_node.get_mut("src").unwrap() = img_path.clone();
// srcset is removed because readers such as Foliate then fail to display
// the image already downloaded and stored in src
img_node.remove("srcset");
self.img_urls.push((img_path, img_mime));
}
Ok(())

View file

@ -193,10 +193,7 @@ impl Readability {
.borrow_mut()
.insert(attr_name, prev_value.value.clone());
}
// WARN: This assumes `next_element` returns an element node!!
let inner_node_child =
Self::next_element(inner_node_ref.first_child(), true);
prev_elem.insert_after(inner_node_child.unwrap());
prev_elem.insert_after(new_img.as_node().clone());
prev_elem.detach();
}
}