Fix bug in is_probably_visible

Add fix in `grab_article` when appending nodes. This internally
detaches children so it can end up running only once
This commit is contained in:
Kenneth Gitere 2020-10-22 11:34:12 +03:00
parent a94798cc95
commit a0f69ccf80

View file

@ -773,7 +773,7 @@ impl Readability {
(if let Some(css_str) = attributes.get("style"){ (if let Some(css_str) = attributes.get("style"){
let style_map = Self::inline_css_str_to_map(css_str); let style_map = Self::inline_css_str_to_map(css_str);
if let Some(display_val) = style_map.get("display") { if let Some(display_val) = style_map.get("display") {
display_val != &"hidden" display_val != &"none"
} else { } else {
true true
} }
@ -782,13 +782,10 @@ impl Readability {
}) })
&& !attributes.contains("hidden") && !attributes.contains("hidden")
// check for "fallback-image" so that wikimedia math images are displayed // check for "fallback-image" so that wikimedia math images are displayed
&& (if let Some(aria_hidden_attr) = attributes.get("aria-hidden"){ &&
aria_hidden_attr.trim() != "true" (!attributes.contains("aria-hidden") ||
} else if let Some(class_str) = attributes.get("class"){ attributes.get("aria-hidden").map(|val| val != "true").unwrap_or(true) ||
class_str.split(" ").collect::<Vec<&str>>().contains(&"fallback-image") attributes.get("class").map(|class_list| class_list.split(" ").collect::<Vec<&str>>().contains(&"fallback-image")).unwrap_or(false))
} else {
true
})
} else { } else {
// Technically, it should not matter what value is returned here // Technically, it should not matter what value is returned here
true true
@ -1766,9 +1763,12 @@ impl Readability {
BTreeMap::new(), BTreeMap::new(),
); );
needed_to_create_top_candidate = true; needed_to_create_top_candidate = true;
page.as_node().children().for_each(|child_node| { let mut page_children = page.as_node().children();
let mut page_child = page_children.next();
while let Some(child_node) = page_child {
page_child = page_children.next();
top_candidate.append(child_node); top_candidate.append(child_node);
}); }
page.as_node().append(top_candidate.clone()); page.as_node().append(top_candidate.clone());
self.initialize_node(&mut top_candidate); self.initialize_node(&mut top_candidate);
} else { } else {
@ -1906,7 +1906,8 @@ impl Readability {
let sibling_score_threshold = (10.0_f32).max(top_candidate_score * 0.2); let sibling_score_threshold = (10.0_f32).max(top_candidate_score * 0.2);
parent_of_top_candidate = top_candidate.parent().unwrap(); parent_of_top_candidate = top_candidate.parent().unwrap();
let siblings = parent_of_top_candidate
let mut siblings = parent_of_top_candidate
.children() .children()
.filter(|node| node.as_element().is_some()); .filter(|node| node.as_element().is_some());
@ -1922,7 +1923,9 @@ impl Readability {
.unwrap(); .unwrap();
(class, score) (class, score)
}; };
for sibling in siblings { let mut next_sibling = siblings.next();
while let Some(sibling) = next_sibling {
next_sibling = siblings.next();
let mut append = false; let mut append = false;
if sibling == top_candidate { if sibling == top_candidate {
append = true; append = true;
@ -2384,7 +2387,7 @@ mod test {
<html> <html>
<body> <body>
<p id="visible">Lorem ipsum dolores</p> <p id="visible">Lorem ipsum dolores</p>
<div id="hidden-div" style="display: hidden"> <div id="hidden-div" style="display: none">
<p>This is hidden and so is the parent</p> <p>This is hidden and so is the parent</p>
</div> </div>
<input value="Some good CSRF token" hidden> <input value="Some good CSRF token" hidden>