From 529704d2278daa7a0acdc183e00bd99d087db227 Mon Sep 17 00:00:00 2001
From: Kenneth Gitere <gitere81@gmail.com>
Date: Fri, 1 May 2020 20:42:41 +0300
Subject: [PATCH] Add test for extract content

---
 src/extractor.rs | 32 ++++++++++++++++++++++++++------
 src/main.rs      |  9 ++++++---
 2 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/src/extractor.rs b/src/extractor.rs
index 9797e45..2f05b21 100644
--- a/src/extractor.rs
+++ b/src/extractor.rs
@@ -1,4 +1,4 @@
-use std::fs::File;
+use std::io::Write;
 
 use kuchiki::{traits::*, NodeRef};
 
@@ -32,7 +32,7 @@ impl Extractor {
         extract_text_from_node(node_ref.as_node())
     }
 
-    pub fn extract_content(&self) {
+    pub fn extract_content<W: Write>(&self, writer: &mut W) {
         // Extract the useful parts of the head section
         let author: Option<String> =
             self.extract_attr_val("meta[name='author']", "content", |author| {
@@ -58,8 +58,10 @@ impl Extractor {
         let meta_attrs = MetaAttr::new(author, description, lang, tags, title);
         dbg!(meta_attrs);
 
+        // Extract the article
+
         let article_ref = self.root_node.select_first("article").unwrap();
-        let mut out_file = File::create("out.html").expect("Can't make file");
+
         for node_ref in article_ref.as_node().descendants() {
             match node_ref.data() {
                 kuchiki::NodeData::Element(..) | kuchiki::NodeData::Text(..) => (),
@@ -70,9 +72,7 @@ impl Extractor {
         for node_ref in article_ref.as_node().children() {
             match node_ref.data() {
                 kuchiki::NodeData::Element(_) => {
-                    node_ref
-                        .serialize(&mut out_file)
-                        .expect("Serialization failed");
+                    node_ref.serialize(writer).expect("Serialization failed");
                 }
 
                 _ => (),
@@ -178,4 +178,24 @@ mod test {
         assert!(h1_text.is_some());
         assert_eq!("Testing Paperoni".to_string(), h1_text.unwrap());
     }
+
+    #[test]
+    fn test_extract_content() {
+        let extracted_html: String = r#"
+            <h1>Starting out</h1>
+            <p>Some Lorem Ipsum text here</p>
+            <p>Observe this picture</p>
+            <img alt="Random image" src="./img.jpg">
+        "#
+        .lines()
+        .map(|line| line.trim())
+        .collect();
+
+        let extractor = Extractor::from_html(TEST_HTML);
+        let mut output_string = Vec::new();
+        extractor.extract_content(&mut output_string);
+        let output_string = std::str::from_utf8(&output_string).unwrap();
+        assert!(output_string.len() > 0);
+        assert_eq!(extracted_html, output_string);
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index baf9943..a23e29d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,5 @@
+use std::fs::File;
+
 use async_std::task;
 
 mod extractor;
@@ -8,15 +10,16 @@ fn main() {
         let urls = vec![
             "https://saveandrun.com/posts/2020-01-24-generating-mazes-with-haskell-part-1.html",
             "https://saveandrun.com/posts/2020-04-05-querying-pacman-with-datalog.html",
-            "https://saveandrun.com/posts/2020-01-08-working-with-git.html",
             "https://blog.hipstermojo.xyz/posts/redis-orm-preface/",
             "https://vuejsdevelopers.com/2020/03/31/vue-js-form-composition-api/?utm_campaign=xl5&utm_medium=article&utm_source=vuejsnews#adding-validators",
             "https://medium.com/typeforms-engineering-blog/the-beginners-guide-to-oauth-dancing-4b8f3666de10",
             "https://dev.to/steelwolf180/full-stack-development-in-django-3768"
         ];
-        let html = fetch_url(urls[4]).await;
+        let html = fetch_url(urls[6]).await;
         let extractor = Extractor::from_html(&html);
-        extractor.extract_content();
+        println!("Extracting");
+        let mut out_file = File::create("out.html").expect("Can't make file");
+        extractor.extract_content(&mut out_file);
     });
 }