Minor fixes in moz_readability
- swap unwrap for if let statement in `get_article_metadata` - add default when extracting the title from a possible `<title>` element - fix extracting alternative titles from h1 tags
This commit is contained in:
parent
dbac7c3b69
commit
960f114dc6
1 changed files with 4 additions and 5 deletions
|
@ -429,8 +429,7 @@ impl Readability {
|
||||||
let mut matches = None;
|
let mut matches = None;
|
||||||
if let Some(property) = node_attr.get("property") {
|
if let Some(property) = node_attr.get("property") {
|
||||||
matches = regexes::PROPERTY_REGEX.captures(property);
|
matches = regexes::PROPERTY_REGEX.captures(property);
|
||||||
if matches.is_some() {
|
if let Some(captures) = &matches {
|
||||||
let captures = matches.as_ref().unwrap();
|
|
||||||
for capture in captures.iter() {
|
for capture in captures.iter() {
|
||||||
let mut name = capture.unwrap().as_str().to_lowercase();
|
let mut name = capture.unwrap().as_str().to_lowercase();
|
||||||
name = regexes::REPLACE_WHITESPACE_REGEX
|
name = regexes::REPLACE_WHITESPACE_REGEX
|
||||||
|
@ -564,7 +563,7 @@ impl Readability {
|
||||||
.root_node
|
.root_node
|
||||||
.select_first("title")
|
.select_first("title")
|
||||||
.map(|title| title.text_contents().trim().to_string())
|
.map(|title| title.text_contents().trim().to_string())
|
||||||
.expect("This file has no <title> tag to extract a title from");
|
.unwrap_or("".to_string());
|
||||||
let orig_title = cur_title.clone();
|
let orig_title = cur_title.clone();
|
||||||
let mut title_had_hierarchical_separators = false;
|
let mut title_had_hierarchical_separators = false;
|
||||||
let word_count = |s: &str| -> usize { s.split_whitespace().count() };
|
let word_count = |s: &str| -> usize { s.split_whitespace().count() };
|
||||||
|
@ -598,8 +597,8 @@ impl Readability {
|
||||||
}
|
}
|
||||||
} else if cur_title.len() > 150 || cur_title.len() < 15 {
|
} else if cur_title.len() > 150 || cur_title.len() < 15 {
|
||||||
let mut h1_nodes = self.root_node.select("h1").unwrap();
|
let mut h1_nodes = self.root_node.select("h1").unwrap();
|
||||||
let (_, h1_count) = h1_nodes.size_hint();
|
let h1_count = self.root_node.select("h1").unwrap().count();
|
||||||
if Some(1) == h1_count {
|
if h1_count == 1 {
|
||||||
cur_title = Self::get_inner_text(h1_nodes.next().unwrap().as_node(), None);
|
cur_title = Self::get_inner_text(h1_nodes.next().unwrap().as_node(), None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue