From d50f08b875316e08a8713a0da86e32bbc9c2628b Mon Sep 17 00:00:00 2001 From: KOVACS Tamas Date: Mon, 10 May 2021 01:30:05 +0200 Subject: [PATCH 1/2] moz_readability/mod.rs: add testcase for issue #13 This patch adds a testcase for issue #13, where an img node without a class attribute is automatically assumed to be lazy and its src is replaced. --- src/moz_readability/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/moz_readability/mod.rs b/src/moz_readability/mod.rs index 38236d3..7af37f7 100644 --- a/src/moz_readability/mod.rs +++ b/src/moz_readability/mod.rs @@ -3160,6 +3160,7 @@ characters. For that reason, this

tag could not be a byline because it's too Flowers + "#; @@ -3189,6 +3190,13 @@ characters. For that reason, this

tag could not be a byline because it's too lazy_loaded_attrs.get("data-src"), lazy_loaded_attrs.get("src") ); + + let no_lazy_class = doc.root_node.select_first("#no-lazy-class").unwrap(); + let no_lazy_class_attrs = no_lazy_class.attributes.borrow(); + assert_eq!( + no_lazy_class_attrs.get("src").unwrap(), + "https://image.url/" + ); } #[test] From 7649f6aa183bac6d1b579beb0a5cd16207005f75 Mon Sep 17 00:00:00 2001 From: KOVACS Tamas Date: Mon, 10 May 2021 01:33:12 +0200 Subject: [PATCH 2/2] moz_readability/mod.rs: fix laziness check in fix_lazy_images fix_lazy_images checks whether an img node is lazily loaded. An img is considered lazily loaded if it does not have an src/srcset attribute, or if it's class contains the 'lazy' string. If an img is considered lazy, fix_lazy_images will make attempts to replace it's src. However, if an img was missing the class attribute, it was incorrectly assumed to be lazy and had it's src replaced. Fixes hipstermojo/paperoni#13 --- src/moz_readability/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/moz_readability/mod.rs b/src/moz_readability/mod.rs index 7af37f7..705fa55 100644 --- a/src/moz_readability/mod.rs +++ b/src/moz_readability/mod.rs @@ -1248,8 +1248,7 @@ impl Readability { let srcset = node_attr.get("srcset"); let class = node_attr.get("class"); if (src.is_some() || srcset.is_some()) - && class.is_some() - && !class.unwrap().contains("lazy") + && class.and_then(|classname| classname.find("lazy")).is_none() { continue; }