From e6f901eb5a8bd2bdfc95fe4424dddb2b3c6d8ff9 Mon Sep 17 00:00:00 2001
From: Kenneth Gitere <gitere81@gmail.com>
Date: Sat, 24 Jul 2021 12:43:40 +0300
Subject: [PATCH] refactor: rename `Extractor` to `Article`

---
 src/epub.rs      | 14 +++++++-------
 src/extractor.rs | 28 ++++++++++++++--------------
 src/html.rs      | 24 ++++++++++++------------
 src/http.rs      | 10 +++++-----
 4 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/src/epub.rs b/src/epub.rs
index 8c280f1..d589ff4 100644
--- a/src/epub.rs
+++ b/src/epub.rs
@@ -8,7 +8,7 @@ use indicatif::{ProgressBar, ProgressStyle};
 use kuchiki::NodeRef;
 use log::{debug, error, info};
 
-use crate::{cli::AppConfig, errors::PaperoniError, extractor::Extractor};
+use crate::{cli::AppConfig, errors::PaperoniError, extractor::Article};
 
 lazy_static! {
     static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r#"(&|<|>|'|")"#).unwrap();
@@ -16,7 +16,7 @@ lazy_static! {
 }
 
 pub fn generate_epubs(
-    articles: Vec<Extractor>,
+    articles: Vec<Article>,
     app_config: &AppConfig,
     successful_articles_table: &mut Table,
 ) -> Result<(), Vec<PaperoniError>> {
@@ -88,9 +88,9 @@ pub fn generate_epubs(
                         let content_url = format!("article_{}.xhtml", idx);
                         let mut xhtml_buf = Vec::new();
                         let header_level_tocs =
-                            get_header_level_toc_vec(&content_url, article.article());
+                            get_header_level_toc_vec(&content_url, article.node_ref());
 
-                        serialize_to_xhtml(article.article(), &mut xhtml_buf)?;
+                        serialize_to_xhtml(article.node_ref(), &mut xhtml_buf)?;
                         let xhtml_str = std::str::from_utf8(&xhtml_buf)?;
                         let section_name = article.metadata().title();
                         let mut content = EpubContent::new(&content_url, xhtml_str.as_bytes())
@@ -179,8 +179,8 @@ pub fn generate_epubs(
                     let mut out_file = File::create(&file_name).unwrap();
                     let mut xhtml_buf = Vec::new();
                     let header_level_tocs =
-                        get_header_level_toc_vec("index.xhtml", article.article());
-                    serialize_to_xhtml(article.article(), &mut xhtml_buf)
+                        get_header_level_toc_vec("index.xhtml", article.node_ref());
+                    serialize_to_xhtml(article.node_ref(), &mut xhtml_buf)
                         .expect("Unable to serialize to xhtml");
                     let xhtml_str = std::str::from_utf8(&xhtml_buf).unwrap();
 
@@ -269,7 +269,7 @@ fn add_stylesheets<T: epub_builder::Zip>(
 }
 
 //TODO: The type signature of the argument should change as it requires that merged articles create an entirely new Vec of references
-fn generate_appendix(articles: Vec<&Extractor>) -> String {
+fn generate_appendix(articles: Vec<&Article>) -> String {
     let link_tags: String = articles
         .iter()
         .map(|article| {
diff --git a/src/extractor.rs b/src/extractor.rs
index 9df5168..b16373a 100644
--- a/src/extractor.rs
+++ b/src/extractor.rs
@@ -6,18 +6,18 @@ use crate::moz_readability::{MetaData, Readability};
 
 pub type ResourceInfo = (String, Option<String>);
 
-pub struct Extractor {
-    article: Option<NodeRef>,
+pub struct Article {
+    node_ref_opt: Option<NodeRef>,
     pub img_urls: Vec<ResourceInfo>,
     readability: Readability,
     pub url: String,
 }
 
-impl Extractor {
+impl Article {
     /// Create a new instance of an HTML extractor given an HTML string
     pub fn from_html(html_str: &str, url: &str) -> Self {
-        Extractor {
-            article: None,
+        Self {
+            node_ref_opt: None,
             img_urls: Vec::new(),
             readability: Readability::new(html_str),
             url: url.to_string(),
@@ -42,14 +42,14 @@ impl Extractor {
             let doc = kuchiki::parse_html().one(template);
             let body = doc.select_first("body").unwrap();
             body.as_node().append(article_node_ref.clone());
-            self.article = Some(doc);
+            self.node_ref_opt = Some(doc);
         }
         Ok(())
     }
 
     /// Traverses the DOM tree of the content and retrieves the IMG URLs
     pub fn extract_img_urls(&mut self) {
-        if let Some(content_ref) = &self.article {
+        if let Some(content_ref) = &self.node_ref_opt {
             self.img_urls = content_ref
                 .select("img")
                 .unwrap()
@@ -67,8 +67,8 @@ impl Extractor {
     }
 
     /// Returns the extracted article [NodeRef]. It should only be called *AFTER* calling parse
-    pub fn article(&self) -> &NodeRef {
-        self.article.as_ref().expect(
+    pub fn node_ref(&self) -> &NodeRef {
+        self.node_ref_opt.as_ref().expect(
             "Article node doesn't exist. This may be because the document has not been parsed",
         )
     }
@@ -112,16 +112,16 @@ mod test {
 
     #[test]
     fn test_extract_img_urls() {
-        let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/");
-        extractor
+        let mut article = Article::from_html(TEST_HTML, "http://example.com/");
+        article
             .extract_content()
             .expect("Article extraction failed unexpectedly");
-        extractor.extract_img_urls();
+        article.extract_img_urls();
 
-        assert!(extractor.img_urls.len() > 0);
+        assert!(article.img_urls.len() > 0);
         assert_eq!(
             vec![("http://example.com/img.jpg".to_string(), None)],
-            extractor.img_urls
+            article.img_urls
         );
     }
 }
diff --git a/src/html.rs b/src/html.rs
index a26fe85..7b761d2 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -14,7 +14,7 @@ use log::{debug, error, info};
 use crate::{
     cli::{self, AppConfig},
     errors::PaperoniError,
-    extractor::Extractor,
+    extractor::Article,
     moz_readability::MetaData,
 };
 
@@ -29,7 +29,7 @@ const BASE_HTML_TEMPLATE: &str = r#"<!DOCTYPE html>
 </html>"#;
 
 pub fn generate_html_exports(
-    articles: Vec<Extractor>,
+    articles: Vec<Article>,
     app_config: &AppConfig,
     successful_articles_table: &mut Table,
 ) -> Result<(), Vec<PaperoniError>> {
@@ -80,7 +80,7 @@ pub fn generate_html_exports(
 
             for (idx, article) in articles.iter().enumerate() {
                 let article_elem = article
-                    .article()
+                    .node_ref()
                     .select_first("div[id=\"readability-page-1\"]")
                     .unwrap();
 
@@ -226,16 +226,16 @@ pub fn generate_html_exports(
                         elem_attrs.insert("charset", "UTF-8".into());
                     }
 
-                    if let Ok(head_elem) = article.article().select_first("head") {
+                    if let Ok(head_elem) = article.node_ref().select_first("head") {
                         let head_elem_node = head_elem.as_node();
                         head_elem_node.append(utf8_encoding);
                     };
 
-                    insert_title_elem(article.article(), article.metadata().title());
-                    insert_appendix(article.article(), vec![(article.metadata(), &article.url)]);
-                    inline_css(article.article(), app_config);
+                    insert_title_elem(article.node_ref(), article.metadata().title());
+                    insert_appendix(article.node_ref(), vec![(article.metadata(), &article.url)]);
+                    inline_css(article.node_ref(), app_config);
 
-                    article.article().serialize(&mut out_file)?;
+                    article.node_ref().serialize(&mut out_file)?;
                     Ok(())
                 };
 
@@ -269,7 +269,7 @@ fn create_qualname(name: &str) -> QualName {
 
 /// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
 fn update_imgs_base64(
-    article: &Extractor,
+    article: &Article,
     img_url: &str,
     mime_type: &str,
 ) -> Result<(), std::io::Error> {
@@ -279,7 +279,7 @@ fn update_imgs_base64(
     let img_base64_str = format!("data:image:{};base64,{}", mime_type, encode(img_bytes));
 
     let img_elems = article
-        .article()
+        .node_ref()
         .select(&format!("img[src=\"{}\"]", img_url))
         .unwrap();
     for img_elem in img_elems {
@@ -292,14 +292,14 @@ fn update_imgs_base64(
 }
 
 /// Updates the src attribute of `<img>` elements to the new `imgs_dir_path` and copies the image to the new file location
-fn update_img_urls(article: &Extractor, imgs_dir_path: &Path) -> Result<(), std::io::Error> {
+fn update_img_urls(article: &Article, imgs_dir_path: &Path) -> Result<(), std::io::Error> {
     let temp_dir = std::env::temp_dir();
     for (img_url, _) in &article.img_urls {
         let (from, to) = (temp_dir.join(img_url), imgs_dir_path.join(img_url));
         info!("Copying {:?} to {:?}", from, to);
         fs::copy(from, to)?;
         let img_elems = article
-            .article()
+            .node_ref()
             .select(&format!("img[src=\"{}\"]", img_url))
             .unwrap();
         for img_elem in img_elems {
diff --git a/src/http.rs b/src/http.rs
index 8707977..15cdb3c 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -9,7 +9,7 @@ use url::Url;
 
 use crate::cli::AppConfig;
 use crate::errors::{ErrorKind, ImgError, PaperoniError};
-use crate::extractor::Extractor;
+use crate::extractor::Article;
 type HTMLResource = (String, String);
 
 pub fn download(
@@ -17,7 +17,7 @@ pub fn download(
     bar: &ProgressBar,
     partial_downloads: &mut Vec<PartialDownload>,
     errors: &mut Vec<PaperoniError>,
-) -> Vec<Extractor> {
+) -> Vec<Article> {
     task::block_on(async {
         let urls_iter = app_config.urls.iter().map(|url| fetch_html(url));
         let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn);
@@ -26,7 +26,7 @@ pub fn download(
             match fetch_result {
                 Ok((url, html)) => {
                     debug!("Extracting {}", &url);
-                    let mut extractor = Extractor::from_html(&html, &url);
+                    let mut extractor = Article::from_html(&html, &url);
                     bar.set_message("Extracting...");
                     match extractor.extract_content() {
                         Ok(_) => {
@@ -185,7 +185,7 @@ async fn process_img_response<'a>(
 }
 
 pub async fn download_images(
-    extractor: &mut Extractor,
+    extractor: &mut Article,
     article_origin: &Url,
     bar: &ProgressBar,
 ) -> Result<(), Vec<ImgError>> {
@@ -237,7 +237,7 @@ pub async fn download_images(
     let replace_existing_img_src = |img_item: ImgItem| -> (String, Option<String>) {
         let (img_url, img_path, img_mime) = img_item;
         let img_ref = extractor
-            .article()
+            .node_ref()
             .select_first(&format!("img[src='{}']", img_url))
             .expect("Image node does not exist");
         let mut img_node = img_ref.attributes.borrow_mut();