From 1dc7b3432b9bf1edb457bbd117f5988c25f2db18 Mon Sep 17 00:00:00 2001 From: Kenneth Gitere Date: Tue, 12 Jan 2021 10:21:11 +0300 Subject: [PATCH] Bug fixes The bug fixes include: - `` nodes being added to the replaced image when `unwrap_noscript_tags` is called. - Remove `srcset` attribute of tags after downloading the image. This prevented readers like Foliate from displaying the downloaded image --- src/extractor.rs | 7 ++++++- src/moz_readability/mod.rs | 5 +---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/extractor.rs b/src/extractor.rs index 9294ae6..2b90e3b 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -68,7 +68,9 @@ impl Extractor { pub async fn download_images(&mut self, article_origin: &Url) -> async_std::io::Result<()> { let mut async_download_tasks = Vec::with_capacity(self.img_urls.len()); self.extract_img_urls(); - println!("Downloading images..."); + if self.img_urls.len() > 0 { + println!("Downloading images..."); + } for img_url in &self.img_urls { let img_url = img_url.0.clone(); let abs_url = get_absolute_url(&img_url, article_origin); @@ -129,6 +131,9 @@ impl Extractor { .expect("Image node does not exist"); let mut img_node = img_ref.attributes.borrow_mut(); *img_node.get_mut("src").unwrap() = img_path.clone(); + // srcset is removed because readers such as Foliate then fail to display + // the image already downloaded and stored in src + img_node.remove("srcset"); self.img_urls.push((img_path, img_mime)); } Ok(()) diff --git a/src/moz_readability/mod.rs b/src/moz_readability/mod.rs index a252d57..7986c2b 100644 --- a/src/moz_readability/mod.rs +++ b/src/moz_readability/mod.rs @@ -193,10 +193,7 @@ impl Readability { .borrow_mut() .insert(attr_name, prev_value.value.clone()); } - // WARN: This assumes `next_element` returns an element node!! - let inner_node_child = - Self::next_element(inner_node_ref.first_child(), true); - prev_elem.insert_after(inner_node_child.unwrap()); + prev_elem.insert_after(new_img.as_node().clone()); prev_elem.detach(); } }