Fix bug in is_probably_visible
Add fix in `grab_article` when appending nodes. This internally detaches children so it can end up running only once
This commit is contained in:
parent
a94798cc95
commit
a0f69ccf80
1 changed files with 16 additions and 13 deletions
|
@ -773,7 +773,7 @@ impl Readability {
|
||||||
(if let Some(css_str) = attributes.get("style"){
|
(if let Some(css_str) = attributes.get("style"){
|
||||||
let style_map = Self::inline_css_str_to_map(css_str);
|
let style_map = Self::inline_css_str_to_map(css_str);
|
||||||
if let Some(display_val) = style_map.get("display") {
|
if let Some(display_val) = style_map.get("display") {
|
||||||
display_val != &"hidden"
|
display_val != &"none"
|
||||||
} else {
|
} else {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
@ -782,13 +782,10 @@ impl Readability {
|
||||||
})
|
})
|
||||||
&& !attributes.contains("hidden")
|
&& !attributes.contains("hidden")
|
||||||
// check for "fallback-image" so that wikimedia math images are displayed
|
// check for "fallback-image" so that wikimedia math images are displayed
|
||||||
&& (if let Some(aria_hidden_attr) = attributes.get("aria-hidden"){
|
&&
|
||||||
aria_hidden_attr.trim() != "true"
|
(!attributes.contains("aria-hidden") ||
|
||||||
} else if let Some(class_str) = attributes.get("class"){
|
attributes.get("aria-hidden").map(|val| val != "true").unwrap_or(true) ||
|
||||||
class_str.split(" ").collect::<Vec<&str>>().contains(&"fallback-image")
|
attributes.get("class").map(|class_list| class_list.split(" ").collect::<Vec<&str>>().contains(&"fallback-image")).unwrap_or(false))
|
||||||
} else {
|
|
||||||
true
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
// Technically, it should not matter what value is returned here
|
// Technically, it should not matter what value is returned here
|
||||||
true
|
true
|
||||||
|
@ -1766,9 +1763,12 @@ impl Readability {
|
||||||
BTreeMap::new(),
|
BTreeMap::new(),
|
||||||
);
|
);
|
||||||
needed_to_create_top_candidate = true;
|
needed_to_create_top_candidate = true;
|
||||||
page.as_node().children().for_each(|child_node| {
|
let mut page_children = page.as_node().children();
|
||||||
|
let mut page_child = page_children.next();
|
||||||
|
while let Some(child_node) = page_child {
|
||||||
|
page_child = page_children.next();
|
||||||
top_candidate.append(child_node);
|
top_candidate.append(child_node);
|
||||||
});
|
}
|
||||||
page.as_node().append(top_candidate.clone());
|
page.as_node().append(top_candidate.clone());
|
||||||
self.initialize_node(&mut top_candidate);
|
self.initialize_node(&mut top_candidate);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1906,7 +1906,8 @@ impl Readability {
|
||||||
|
|
||||||
let sibling_score_threshold = (10.0_f32).max(top_candidate_score * 0.2);
|
let sibling_score_threshold = (10.0_f32).max(top_candidate_score * 0.2);
|
||||||
parent_of_top_candidate = top_candidate.parent().unwrap();
|
parent_of_top_candidate = top_candidate.parent().unwrap();
|
||||||
let siblings = parent_of_top_candidate
|
|
||||||
|
let mut siblings = parent_of_top_candidate
|
||||||
.children()
|
.children()
|
||||||
.filter(|node| node.as_element().is_some());
|
.filter(|node| node.as_element().is_some());
|
||||||
|
|
||||||
|
@ -1922,7 +1923,9 @@ impl Readability {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
(class, score)
|
(class, score)
|
||||||
};
|
};
|
||||||
for sibling in siblings {
|
let mut next_sibling = siblings.next();
|
||||||
|
while let Some(sibling) = next_sibling {
|
||||||
|
next_sibling = siblings.next();
|
||||||
let mut append = false;
|
let mut append = false;
|
||||||
if sibling == top_candidate {
|
if sibling == top_candidate {
|
||||||
append = true;
|
append = true;
|
||||||
|
@ -2384,7 +2387,7 @@ mod test {
|
||||||
<html>
|
<html>
|
||||||
<body>
|
<body>
|
||||||
<p id="visible">Lorem ipsum dolores</p>
|
<p id="visible">Lorem ipsum dolores</p>
|
||||||
<div id="hidden-div" style="display: hidden">
|
<div id="hidden-div" style="display: none">
|
||||||
<p>This is hidden and so is the parent</p>
|
<p>This is hidden and so is the parent</p>
|
||||||
</div>
|
</div>
|
||||||
<input value="Some good CSRF token" hidden>
|
<input value="Some good CSRF token" hidden>
|
||||||
|
|
Reference in a new issue