Merge pull request #20 from hipstermojo/dev

v0.6.0 release
2021-07-24 13:54:50 +03:00 · 2021-07-24 13:54:50 +03:00 · 3958261cda
commit 3958261cda
parent 6b1a826ccc 40cf5b06c9
16 changed files with 746 additions and 148 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,7 @@
 /target
 *.epub
+# Only ignore top level html files which may be made when testing
+/*.html
+*.pdf
 *.log
 .vscode/
--- a/Cargo.lock
+++ b/Cargo.lock
@ -395,6 +395,7 @@ dependencies = [
 "textwrap",
 "unicode-width",
 "vec_map",
+ "yaml-rust",
 ]

 [[package]]
@ -1551,9 +1552,10 @@ dependencies = [

 [[package]]
 name = "paperoni"
-version = "0.5.0-alpha1"
+version = "0.6.0-alpha1"
 dependencies = [
 "async-std",
+ "base64",
 "chrono",
 "clap",
 "colored",
@ -2756,6 +2758,12 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+[[package]]
+name = "yaml-rust"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992"
+
 [[package]]
 name = "yansi"
 version = "0.5.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -3,7 +3,7 @@ description = "A web article downloader"
 homepage = "https://github.com/hipstermojo/paperoni"
 repository = "https://github.com/hipstermojo/paperoni"
 name = "paperoni"
-version = "0.5.0-alpha1"
+version = "0.6.0-alpha1"
 authors = ["Kenneth Gitere <gitere81@gmail.com>"]
 edition = "2018"
 license = "MIT"
@ -14,8 +14,9 @@ readme = "README.md"
 [dependencies]
 # atty = "0.2.14"
 async-std = "1.9.0"
+base64 = "0.13.0"
 chrono = "0.4.19"
-clap = "2.33.3"
+clap = { version = "2.33.3", features = ["yaml"] }
 colored = "2.0.0"
 comfy-table = "3.0.0"
 derive_builder = "0.10.2"
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@
    </a>
 </div>

-Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs. There is provisional<sup><a href="#pdf-exports">\*</a></sup> support for exporting to PDF as well.
+Paperoni is a CLI tool made in Rust for downloading web articles as EPUB or HTML files. There is provisional<sup><a href="#pdf-exports">\*</a></sup> support for exporting to PDF as well.

 > This project is in an alpha release so it might crash when you use it. Please open an [issue on Github](https://github.com/hipstermojo/paperoni/issues/new) if it does crash.

@ -23,7 +23,7 @@ Check the [releases](https://github.com/hipstermojo/paperoni/releases) page for
 Paperoni is published on [crates.io](https://crates.io). If you have [cargo](https://github.com/rust-lang/cargo) installed, then run:

 ```sh
-cargo install paperoni --version 0.5.0-alpha1
+cargo install paperoni --version 0.6.0-alpha1
 ```

 _Paperoni is still in alpha so the `version` flag has to be passed._
@ -48,39 +48,54 @@ USAGE:
    paperoni [OPTIONS] [urls]...

 OPTIONS:
+        --export <type>
+            Specify the file type of the export. The type must be in lower case. [default: epub]  [possible values:
+            html, epub]
    -f, --file <file>
            Input file containing links

    -h, --help
            Prints help information

+        --inline-images
+            Inlines the article images when exporting to HTML using base64.
+            This is used when you do not want a separate folder created for images during HTML export.
+            NOTE: It uses base64 encoding on the images which results in larger HTML export sizes as each image
+            increases in size by about 25%-33%.
        --inline-toc
-            Add an inlined Table of Contents page at the start of the merged article.
-
+            Add an inlined Table of Contents page at the start of the merged article. This does not affect the Table of Contents navigation
        --log-to-file
            Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to
            specify the logging level
-        --max-conn <max_conn>
+        --max-conn <max-conn>
            The maximum number of concurrent HTTP connections when downloading articles. Default is 8.
            NOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end
            up overloading your network card with too many concurrent requests.
-    -o, --output-dir <output_directory>
-            Directory for saving epub documents
-
-        --merge <output_name>
+        --no-css
+            Removes the stylesheets used in the EPUB generation.
+            The EPUB file will then be laid out based on your e-reader's default stylesheets.
+            Images and code blocks may overflow when this flag is set and layout of generated
+            PDFs will be affected. Use --no-header-css if you want to only disable the styling on headers.
+        --no-header-css
+            Removes the header CSS styling but preserves styling of images and codeblocks. To remove all the default
+            CSS, use --no-css instead.
+        --merge <output-name>
            Merge multiple articles into a single epub that will be given the name provided

+    -o, --output-dir <output_directory>
+            Directory to store output epub documents
+
    -V, --version
            Prints version information

    -v
            This takes upto 4 levels of verbosity in the following order.
-             - Error (-v)
-             - Warn (-vv)
-             - Info (-vvv)
-             - Debug (-vvvv)
-             When this flag is passed, it disables the progress bars and logs to stderr.
-             If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag.
+            - Error (-v)
+            - Warn (-vv)
+            - Info (-vvv)
+            - Debug (-vvvv)
+            When this flag is passed, it disables the progress bars and logs to stderr.
+            If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag.

 ARGS:
    <urls>...
@ -112,6 +127,41 @@ These can also be read from a file using the `-f/--file` flag.
 paperoni -f links.txt
 ```

+### Exporting articles
+
+By default, Paperoni exports to EPUB files but you can change to HTML by passing the `--export html` flag.
+
+```sh
+paperoni https://en.wikipedia.org/wiki/Pepperoni --export html
+```
+
+HTML exports allow you to read the articles as plain HTML documents on your browser but can also be used to convert to PDF as explained [here](#).
+
+When exporting to HTML, Paperoni will download the article's images to a folder named similar to the article. Therefore the folder structure would look like this for the command ran above:
+
+```
+.
+├── Pepperoni - Wikipedia
+│   ├── 1a9f886e9b58db72e0003a2cd52681d8.png
+│   ├── 216f8a4265a1ceb3f8cfba4c2f9057b1.jpeg
+│   ...
+└── Pepperoni - Wikipedia.html
+```
+
+If you would instead prefer to have the images inlined directly to the HTML export, pass the `inline-images` flag, i.e.:
+
+```sh
+paperoni https://en.wikipedia.org/wiki/Pepperoni --export html --inline-images
+```
+
+This is especially useful when exporting multiple links.
+
+**NOTE**: The inlining of images for HTML exports uses base64 encoding which is known to increase the overall size of images by about 25% to 33%.
+
+### Disabling CSS
+
+The `no-css` and `no-header-css` flags can be used to remove the default styling added by Paperoni. Refer to `--help` to see the usage of the flags.
+
 ### Merging articles

 By default, Paperoni generates an epub file for each link. You can also merge multiple links
@ -153,7 +203,11 @@ There are also web pages it won't work on in general such as Twitter and Reddit

 ## PDF exports

-As of version 0.5-alpha1, you can now export to PDF using a third party tool. This requires that you install [Calibre](https://calibre-ebook.com/) which comes with a ebook conversion. You can convert the epub to a pdf through the terminal with `ebook-convert`:
+PDF conversion can be done using a third party tool. There are 2 options to do so:
+
+### EPUB to PDF
+
+This requires that you install [Calibre](https://calibre-ebook.com/) which comes with a ebook conversion. You can convert the epub to a pdf through the terminal with `ebook-convert`:

 ```sh
 # Assuming the downloaded epub was called foo.epub
@ -161,3 +215,25 @@ ebook-convert foo.epub foo.pdf
 ```

 Alternatively, you can use the Calibre GUI to do the file conversion.
+
+### HTML to PDF
+
+The recommended approach is to use [Weasyprint](https://weasyprint.org/start/), a free and open-source tool that converts HTML documents to PDF. It is available on Linux, MacOS and Windows. Using the CLI, it can be done as follows:
+
+```sh
+paperoni https://en.wikipedia.org/wiki/Pepperoni --export html
+weasyprint "Pepperoni - Wikipedia.html" Pepperoni.pdf
+```
+
+Inlining images is not mandatory as Weasyprint will be able to find the files on its own.
+
+### Comparison of PDF conversion methods
+
+Either of the conversion methods is sufficient for most use cases. The main differences are listed below:
+| | EPUB to PDF | HTML to PDF |
+|----------------------|----------------------------|------------------|
+| Wrapping code blocks | Yes | No |
+| CSS customization | No | Yes |
+| Generated file size | Slightly larger | Slightly smaller |
+
+The difference in file size is due to the additional fonts added to the PDF file by `ebook-convert`.
--- a/src/assets/body.min.css
+++ b/src/assets/body.min.css
@ -0,0 +1,7 @@
+/*!
+ * Writ v1.0.4
+ *
+ * Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
+ *
+ * https://cmcenroe.me/writ/LICENSE (ISC)
+ */dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}small{font-size:.833em}th{font-weight:400}blockquote,dl,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
--- a/src/assets/headers.min.css
+++ b/src/assets/headers.min.css
@ -0,0 +1,7 @@
+/*!
+ * Writ v1.0.4
+ *
+ * Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
+ *
+ * https://cmcenroe.me/writ/LICENSE (ISC)
+ */h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}h1,h2,h3{line-height:3rem}h1,h2,h3,h4,h5,h6{margin:1.5rem 0 0}
--- a/src/assets/writ.min.css
+++ b/src/assets/writ.min.css
@ -1,7 +0,0 @@
-/*!
- * Writ v1.0.4
- *
- * Copyright © 2015, Curtis McEnroe <curtis@cmcenroe.me>
- *
- * https://cmcenroe.me/writ/LICENSE (ISC)
- */dd,hr,ol ol,ol ul,ul ol,ul ul{margin:0}pre,table{overflow-x:auto}a,ins{text-decoration:none}html{font-family:Georgia,Lucida Bright,Book Antiqua,serif;font-size:16px;line-height:1.5rem}code,kbd,pre,samp{font-family:Fira Code,Liberation Mono,Menlo,Courier,monospace;font-size:.833rem;color:#111}kbd{font-weight:700}h1,h2,h3,h4,h5,h6,th{font-weight:400}h1{font-size:2.488em}h2{font-size:2.074em}h3{font-size:1.728em}h4{font-size:1.44em}h5{font-size:1.2em}h6{font-size:1em}small{font-size:.833em}h1,h2,h3{line-height:3rem}blockquote,dl,h1,h2,h3,h4,h5,h6,ol,p,pre,table,ul{margin:1.5rem 0 0}pre,table{margin-bottom:-1px}hr{border:none;padding:1.5rem 0 0}table{line-height:calc(1.5rem - 1px);width:100%;border-collapse:collapse}pre{margin-top:calc(1.5rem - 1px)}body{color:#222;margin:1.5rem 1ch}a,a code,header nav a:visited{color:#00e}a:visited,a:visited code{color:#60b}mark{color:inherit;background-color:#fe0}code,pre,samp,tfoot,thead{background-color:rgba(0,0,0,.05)}blockquote,ins,main aside{border:rgba(0,0,0,.05) solid}blockquote,main aside{border-width:0 0 0 .5ch}code,pre,samp{border:rgba(0,0,0,.1) solid}td,th{border:solid #dbdbdb}body>header{text-align:center}body>footer,main{display:block;max-width:78ch;margin:auto}main aside,main figure{float:right;margin:1.5rem 0 0 1ch}main aside{max-width:26ch;padding:0 0 0 .5ch}blockquote{margin-right:3ch;margin-left:1.5ch;padding:0 0 0 1ch}pre{border-width:1px;border-radius:2px;padding:0 .5ch}pre code{border:none;padding:0;background-color:transparent;white-space:inherit}code,ins,samp,td,th{border-width:1px}img{max-width:100%}dd,ol,ul{padding:0 0 0 3ch}ul>li{list-style-type:disc}li ul>li{list-style-type:circle}li li ul>li{list-style-type:square}ol>li{list-style-type:decimal}li ol>li{list-style-type:lower-roman}li li ol>li{list-style-type:lower-alpha}nav ul{padding:0;list-style-type:none}nav ul li{display:inline;padding-left:1ch;white-space:nowrap}nav ul li:first-child{padding-left:0}ins,mark{padding:1px}td,th{padding:0 .5ch}sub,sup{font-size:.75em;line-height:1em}code,samp{border-radius:2px;padding:.1em .2em;white-space:nowrap}
--- a/src/cli.rs
+++ b/src/cli.rs
@ -1,7 +1,7 @@
 use std::{fs, num::NonZeroUsize, path::Path};

 use chrono::{DateTime, Local};
-use clap::{App, AppSettings, Arg, ArgMatches};
+use clap::{load_yaml, App, ArgMatches};
 use flexi_logger::LevelFilter as LogLevel;
 use itertools::Itertools;

@ -11,10 +11,10 @@ const DEFAULT_MAX_CONN: usize = 8;

 #[derive(derive_builder::Builder)]
 pub struct AppConfig {
-    /// Urls for store in epub
+    /// Article urls
    pub urls: Vec<String>,
    pub max_conn: usize,
-    /// Path to file of multiple articles into a single epub
+    /// Path to file of multiple articles into a single article
    pub merged: Option<String>,
    pub output_directory: Option<String>,
    pub log_level: LogLevel,
@ -22,80 +22,15 @@ pub struct AppConfig {
    pub start_time: DateTime<Local>,
    pub is_logging_to_file: bool,
    pub inline_toc: bool,
+    pub css_config: CSSConfig,
+    pub export_type: ExportType,
+    pub is_inlining_images: bool,
 }

 impl AppConfig {
    pub fn init_with_cli() -> Result<AppConfig, Error> {
-        let app = App::new("paperoni")
-        .settings(&[
-            AppSettings::ArgRequiredElseHelp,
-            AppSettings::UnifiedHelpMessage,
-        ])
-        .version(clap::crate_version!())
-        .about(
-            "Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs",
-        )
-        .arg(
-            Arg::with_name("urls")
-                .help("Urls of web articles")
-                .multiple(true),
-        )
-        .arg(
-            Arg::with_name("file")
-                .short("f")
-                .long("file")
-                .help("Input file containing links")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::with_name("output_directory")
-                .long("output-dir")
-                .short("o")
-                .help("Directory to store output epub documents")
-                .conflicts_with("output_name")
-                .takes_value(true),
-        )
-        .arg(
-            Arg::with_name("output_name")
-                .long("merge")
-                .help("Merge multiple articles into a single epub")
-                .long_help("Merge multiple articles into a single epub that will be given the name provided")
-                .conflicts_with("output_directory")
-                .takes_value(true),
-        ).arg(
-            Arg::with_name("max-conn")
-                .long("max_conn")
-                .help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8")
-                .long_help("The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests.")
-                .takes_value(true))
-        .arg(
-            Arg::with_name("verbosity")
-                .short("v")
-                .multiple(true)
-                .help("Enables logging of events and set the verbosity level. Use --help to read on its usage")
-                .long_help(
-"This takes upto 4 levels of verbosity in the following order.
- - Error (-v)
- - Warn (-vv)
- - Info (-vvv)
- - Debug (-vvvv)
- When this flag is passed, it disables the progress bars and logs to stderr.
- If you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag."
-                )
-                .takes_value(false))
-        .arg(
-            Arg::with_name("log-to-file")
-                .long("log-to-file")
-                .help("Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level")
-                .takes_value(false))
-        .arg(
-            Arg::with_name("inline-toc")
-            .long("inline-toc")
-            .requires("output_name")
-            .help("Add an inlined Table of Contents page at the start of the merged article.")
-            .long_help("Add an inlined Table of Contents page at the start of the merged article. This does not affect the Table of Contents navigation")
-        );
-
+        let yaml_config = load_yaml!("cli_config.yml");
+        let app = App::from_yaml(yaml_config).version(clap::crate_version!());
        Self::try_from(app.get_matches())
    }

@ -159,11 +94,12 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
                Some(max_conn) => max_conn.parse::<NonZeroUsize>()?.get(),
                None => DEFAULT_MAX_CONN,
            })
-            .merged(arg_matches.value_of("output_name").map(|name| {
-                if name.ends_with(".epub") {
+            .merged(arg_matches.value_of("output-name").map(|name| {
+                let file_ext = format!(".{}", arg_matches.value_of("export").unwrap());
+                if name.ends_with(&file_ext) {
                    name.to_owned()
                } else {
-                    name.to_string() + ".epub"
+                    name.to_string() + &file_ext
                }
            }))
            .can_disable_progress_bar(
@ -183,7 +119,17 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
                4..=u64::MAX => LogLevel::Debug,
            })
            .is_logging_to_file(arg_matches.is_present("log-to-file"))
-            .inline_toc(arg_matches.is_present("inline-toc"))
+            .inline_toc(
+                (if arg_matches.is_present("inline-toc") {
+                    if arg_matches.value_of("export") == Some("epub") {
+                        Ok(true)
+                    } else {
+                        Err(Error::WrongExportInliningToC)
+                    }
+                } else {
+                    Ok(false)
+                })?,
+            )
            .output_directory(
                arg_matches
                    .value_of("output_directory")
@ -200,6 +146,25 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
                    .transpose()?,
            )
            .start_time(Local::now())
+            .css_config(
+                match (
+                    arg_matches.is_present("no-css"),
+                    arg_matches.is_present("no-header-css"),
+                ) {
+                    (true, _) => CSSConfig::None,
+                    (_, true) => CSSConfig::NoHeaders,
+                    _ => CSSConfig::All,
+                },
+            )
+            .export_type({
+                let export_type = arg_matches.value_of("export").unwrap();
+                if export_type == "html" {
+                    ExportType::HTML
+                } else {
+                    ExportType::EPUB
+                }
+            })
+            .is_inlining_images(arg_matches.is_present("inline-images"))
            .try_init()
    }
 }
@ -212,3 +177,16 @@ impl AppConfigBuilder {
            .init_merge_file()
    }
 }
+
+#[derive(Clone, Debug)]
+pub enum CSSConfig {
+    All,
+    NoHeaders,
+    None,
+}
+
+#[derive(Clone, Debug)]
+pub enum ExportType {
+    HTML,
+    EPUB,
+}
--- a/src/cli_config.yml
+++ b/src/cli_config.yml
@ -0,0 +1,82 @@
+name: paperoni
+about: Paperoni is a CLI tool made in Rust for downloading web articles as EPUBs
+settings:
+  - ArgRequiredElseHelp
+  - UnifiedHelpMessage
+args:
+  - urls:
+      help: Urls of web articles
+      multiple: true
+  - file:
+      short: f
+      long: file
+      help: Input file containing links
+      takes_value: true
+  - output_directory:
+      short: o
+      long: output-dir
+      help: Directory to store output epub documents
+      conflicts_with: output-name
+      takes_value: true
+  - output-name:
+      long: merge
+      help: Merge multiple articles into a single epub
+      long_help: Merge multiple articles into a single epub that will be given the name provided
+      conflicts_with: output_directory
+      takes_value: true
+  - max-conn:
+      long: max-conn
+      help: The maximum number of concurrent HTTP connections when downloading articles. Default is 8
+      long_help: "The maximum number of concurrent HTTP connections when downloading articles. Default is 8.\nNOTE: It is advised to use as few connections as needed i.e between 1 and 50. Using more connections can end up overloading your network card with too many concurrent requests."
+      takes_value: true
+  - verbosity:
+      short: v
+      multiple: true
+      help: Enables logging of events and set the verbosity level. Use --help to read on its usage
+      long_help: "This takes upto 4 levels of verbosity in the following order.
+        \n- Error (-v)
+        \n- Warn (-vv)
+        \n- Info (-vvv)
+        \n- Debug (-vvvv)
+        \nWhen this flag is passed, it disables the progress bars and logs to stderr.
+        \nIf you would like to send the logs to a file (and enable progress bars), pass the log-to-file flag."
+      takes_value: false
+  - log-to-file:
+      long: log-to-file
+      help: Enables logging of events to a file located in .paperoni/logs with a default log level of debug. Use -v to specify the logging level
+      takes_value: false
+  - inline-toc:
+      long: inline-toc
+      requires: output-name
+      help: Add an inlined Table of Contents page at the start of the merged article.
+      long_help: Add an inlined Table of Contents page at the start of the merged article. This does not affect the Table of Contents navigation
+  - no-css:
+      long: no-css
+      conflicts_with: no-header-css
+      help: Removes the stylesheets used in the EPUB generation. Pass --help to learn more
+      long_help: "Removes the stylesheets used in the EPUB generation.
+        \nThe EPUB file will then be laid out based on your e-reader's default stylesheets.
+        \nImages and code blocks may overflow when this flag is set and layout of generated
+        \nPDFs will be affected. Use --no-header-css if you want to only disable the styling on headers."
+      takes_value: false
+  - no-header-css:
+      long: no-header-css
+      conflicts_with: no-css
+      help: Removes the header CSS styling but preserves styling of images and codeblocks. To remove all the default CSS, use --no-css instead.
+      takes_value: false
+  - export:
+      long: export
+      help: Specify the file type of the export. The type must be in lower case.
+      possible_values: [html, epub]
+      value_name: type
+      takes_value: true
+      default_value: epub
+  - inline-images:
+      long: inline-images
+      help: Inlines the article images when exporting to HTML using base64. Pass --help to learn more.
+      long_help: "Inlines the article images when exporting to HTML using base64.
+      \nThis is used when you do not want a separate folder created for images during HTML export.
+      \nNOTE: It uses base64 encoding on the images which results in larger HTML export sizes as each image
+      increases in size by about 25%-33%."
+      takes_value: false
+      requires: export
--- a/src/epub.rs
+++ b/src/epub.rs
@ -8,14 +8,15 @@ use indicatif::{ProgressBar, ProgressStyle};
 use kuchiki::NodeRef;
 use log::{debug, error, info};

-use crate::{cli::AppConfig, errors::PaperoniError, extractor::Extractor};
+use crate::{cli::AppConfig, errors::PaperoniError, extractor::Article};

 lazy_static! {
    static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r#"(&|<|>|'|")"#).unwrap();
+    static ref VALID_ATTR_CHARS_REGEX: regex::Regex = regex::Regex::new(r#"[a-z0-9\-_:]"#).unwrap();
 }

 pub fn generate_epubs(
-    articles: Vec<Extractor>,
+    articles: Vec<Article>,
    app_config: &AppConfig,
    successful_articles_table: &mut Table,
 ) -> Result<(), Vec<PaperoniError>> {
@ -37,8 +38,6 @@ pub fn generate_epubs(
        enabled_bar
    };

-    let stylesheet = include_bytes!("./assets/writ.min.css");
-
    let mut errors: Vec<PaperoniError> = Vec::new();

    match app_config.merged {
@ -71,7 +70,7 @@ pub fn generate_epubs(
                epub.inline_toc();
            }

-            match epub.stylesheet(stylesheet.as_bytes()) {
+            match add_stylesheets(&mut epub, app_config) {
                Ok(_) => (),
                Err(e) => {
                    error!("Unable to add stylesheets to epub file");
@ -89,9 +88,9 @@ pub fn generate_epubs(
                        let content_url = format!("article_{}.xhtml", idx);
                        let mut xhtml_buf = Vec::new();
                        let header_level_tocs =
-                            get_header_level_toc_vec(&content_url, article.article());
+                            get_header_level_toc_vec(&content_url, article.node_ref());

-                        serialize_to_xhtml(article.article(), &mut xhtml_buf)?;
+                        serialize_to_xhtml(article.node_ref(), &mut xhtml_buf)?;
                        let xhtml_str = std::str::from_utf8(&xhtml_buf)?;
                        let section_name = article.metadata().title();
                        let mut content = EpubContent::new(&content_url, xhtml_str.as_bytes())
@ -146,6 +145,8 @@ pub fn generate_epubs(
                    let mut paperoni_err: PaperoniError = err.into();
                    paperoni_err.set_article_source(&name);
                    errors.push(paperoni_err);
+                    error!("Failed to generate epub: {}", name);
+                    bar.finish_with_message("epub generation failed\n");
                    return Err(errors);
                }
            }
@ -178,8 +179,8 @@ pub fn generate_epubs(
                    let mut out_file = File::create(&file_name).unwrap();
                    let mut xhtml_buf = Vec::new();
                    let header_level_tocs =
-                        get_header_level_toc_vec("index.xhtml", article.article());
-                    serialize_to_xhtml(article.article(), &mut xhtml_buf)
+                        get_header_level_toc_vec("index.xhtml", article.node_ref());
+                    serialize_to_xhtml(article.node_ref(), &mut xhtml_buf)
                        .expect("Unable to serialize to xhtml");
                    let xhtml_str = std::str::from_utf8(&xhtml_buf).unwrap();

@ -187,8 +188,7 @@ pub fn generate_epubs(
                        epub.metadata("author", replace_escaped_characters(author))?;
                    }

-                    epub.stylesheet(stylesheet.as_bytes())?;
-
+                    add_stylesheets(&mut epub, app_config)?;
                    let title = replace_escaped_characters(article.metadata().title());
                    epub.metadata("title", &title)?;

@ -205,7 +205,7 @@ pub fn generate_epubs(
                        let mut file_path = std::env::temp_dir();
                        file_path.push(&img.0);

-                        let img_buf = File::open(&file_path).expect("Can't read file");
+                        let img_buf = File::open(&file_path).expect("Can't read image file");
                        epub.add_resource(
                            file_path.file_name().unwrap(),
                            img_buf,
@ -249,8 +249,27 @@ fn replace_escaped_characters(value: &str) -> String {
        .replace(">", "&gt;")
 }

+fn add_stylesheets<T: epub_builder::Zip>(
+    epub: &mut EpubBuilder<T>,
+    app_config: &AppConfig,
+) -> Result<(), epub_builder::Error> {
+    let body_stylesheet: &[u8] = include_bytes!("./assets/body.min.css");
+    let header_stylesheet: &[u8] = include_bytes!("./assets/headers.min.css");
+    match app_config.css_config {
+        crate::cli::CSSConfig::All => {
+            epub.stylesheet([header_stylesheet, body_stylesheet].concat().as_bytes())?;
+            Ok(())
+        }
+        crate::cli::CSSConfig::NoHeaders => {
+            epub.stylesheet(body_stylesheet.as_bytes())?;
+            Ok(())
+        }
+        _ => Ok(()),
+    }
+}
+
 //TODO: The type signature of the argument should change as it requires that merged articles create an entirely new Vec of references
-fn generate_appendix(articles: Vec<&Extractor>) -> String {
+fn generate_appendix(articles: Vec<&Article>) -> String {
    let link_tags: String = articles
        .iter()
        .map(|article| {
@ -292,6 +311,10 @@ fn generate_header_ids(root_node: &NodeRef) {
    let headers_no_id = headers.filter(|node_data_ref| {
        let attrs = node_data_ref.attributes.borrow();
        !attrs.contains("id")
+            || attrs
+                .get("id")
+                .map(|val| !VALID_ATTR_CHARS_REGEX.is_match(&val))
+                .unwrap()
    });
    for header in headers_no_id {
        let mut attrs = header.attributes.borrow_mut();
@ -410,6 +433,15 @@ fn serialize_to_xhtml<W: std::io::Write>(
    node_ref: &NodeRef,
    mut w: &mut W,
 ) -> Result<(), PaperoniError> {
+    {
+        // Add XHTML attributes
+        let html_elem = node_ref
+            .select_first("html")
+            .expect("Unable to get <html> element in article");
+        let mut html_attrs = html_elem.attributes.borrow_mut();
+        html_attrs.insert("xmlns", "http://www.w3.org/1999/xhtml".into());
+        html_attrs.insert("xmlns:epub", "http://www.idpf.org/2007/ops".into());
+    }
    let mut escape_map = HashMap::new();
    escape_map.insert("<", "&lt;");
    escape_map.insert(">", "&gt;");
@ -430,7 +462,10 @@ fn serialize_to_xhtml<W: std::io::Write>(
                    let attrs_str = attrs
                        .map
                        .iter()
-                        .filter(|(k, _)| !k.local.contains("\""))
+                        .filter(|(k, _)| {
+                            let attr_key: &str = &k.local;
+                            attr_key.is_ascii() && VALID_ATTR_CHARS_REGEX.is_match(attr_key)
+                        })
                        .map(|(k, v)| {
                            format!(
                                "{}=\"{}\"",
--- a/src/errors.rs
+++ b/src/errors.rs
@ -156,4 +156,6 @@ pub enum CliError<BuilderError: Debug + Display> {
    OutputDirectoryNotExists,
    #[error("Unable to start logger!\n{0}")]
    LogError(#[from] LogError),
+    #[error("The --inline-toc can only be used exporting to epub")]
+    WrongExportInliningToC,
 }
--- a/src/extractor.rs
+++ b/src/extractor.rs
@ -6,18 +6,18 @@ use crate::moz_readability::{MetaData, Readability};

 pub type ResourceInfo = (String, Option<String>);

-pub struct Extractor {
-    article: Option<NodeRef>,
+pub struct Article {
+    node_ref_opt: Option<NodeRef>,
    pub img_urls: Vec<ResourceInfo>,
    readability: Readability,
    pub url: String,
 }

-impl Extractor {
+impl Article {
    /// Create a new instance of an HTML extractor given an HTML string
    pub fn from_html(html_str: &str, url: &str) -> Self {
-        Extractor {
-            article: None,
+        Self {
+            node_ref_opt: None,
            img_urls: Vec::new(),
            readability: Readability::new(html_str),
            url: url.to_string(),
@ -30,7 +30,8 @@ impl Extractor {
        self.readability.parse(&self.url)?;
        if let Some(article_node_ref) = &self.readability.article_node {
            let template = r#"
-            <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
+            <!DOCTYPE html>
+            <html>
                <head>
                    <link rel="stylesheet" href="stylesheet.css" type="text/css"></link>
                </head>
@ -41,14 +42,14 @@ impl Extractor {
            let doc = kuchiki::parse_html().one(template);
            let body = doc.select_first("body").unwrap();
            body.as_node().append(article_node_ref.clone());
-            self.article = Some(doc);
+            self.node_ref_opt = Some(doc);
        }
        Ok(())
    }

    /// Traverses the DOM tree of the content and retrieves the IMG URLs
    pub fn extract_img_urls(&mut self) {
-        if let Some(content_ref) = &self.article {
+        if let Some(content_ref) = &self.node_ref_opt {
            self.img_urls = content_ref
                .select("img")
                .unwrap()
@ -66,8 +67,8 @@ impl Extractor {
    }

    /// Returns the extracted article [NodeRef]. It should only be called *AFTER* calling parse
-    pub fn article(&self) -> &NodeRef {
-        self.article.as_ref().expect(
+    pub fn node_ref(&self) -> &NodeRef {
+        self.node_ref_opt.as_ref().expect(
            "Article node doesn't exist. This may be because the document has not been parsed",
        )
    }
@ -111,16 +112,16 @@ mod test {

    #[test]
    fn test_extract_img_urls() {
-        let mut extractor = Extractor::from_html(TEST_HTML, "http://example.com/");
-        extractor
+        let mut article = Article::from_html(TEST_HTML, "http://example.com/");
+        article
            .extract_content()
            .expect("Article extraction failed unexpectedly");
-        extractor.extract_img_urls();
+        article.extract_img_urls();

-        assert!(extractor.img_urls.len() > 0);
+        assert!(article.img_urls.len() > 0);
        assert_eq!(
            vec![("http://example.com/img.jpg".to_string(), None)],
-            extractor.img_urls
+            article.img_urls
        );
    }
 }
--- a/src/html.rs
+++ b/src/html.rs
@ -0,0 +1,391 @@
+use std::{
+    collections::{BTreeMap, HashSet},
+    fs::{self, File},
+    path::Path,
+};
+
+use base64::encode;
+use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
+use html5ever::{LocalName, Namespace, QualName};
+use indicatif::{ProgressBar, ProgressStyle};
+use kuchiki::{traits::*, NodeRef};
+use log::{debug, error, info};
+
+use crate::{
+    cli::{self, AppConfig},
+    errors::PaperoniError,
+    extractor::Article,
+    moz_readability::MetaData,
+};
+
+const HEAD_ELEM_NOT_FOUND: &str =
+    "Unable to get <head> element to inline css. Ensure that the root node is the HTML document.";
+const BASE_HTML_TEMPLATE: &str = r#"<!DOCTYPE html>
+<html>
+<head>
+<meta charset="UTF-8">
+</head>
+<body></body>
+</html>"#;
+
+pub fn generate_html_exports(
+    articles: Vec<Article>,
+    app_config: &AppConfig,
+    successful_articles_table: &mut Table,
+) -> Result<(), Vec<PaperoniError>> {
+    if articles.is_empty() {
+        return Ok(());
+    }
+
+    let bar = if app_config.can_disable_progress_bar {
+        ProgressBar::hidden()
+    } else {
+        let enabled_bar = ProgressBar::new(articles.len() as u64);
+        let style = ProgressStyle::default_bar().template(
+            "{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} html {pos}/{len:7} {msg:.green}",
+        );
+        enabled_bar.set_style(style);
+        if !articles.is_empty() {
+            enabled_bar.set_message("Generating html files");
+        }
+        enabled_bar
+    };
+
+    let mut errors: Vec<PaperoniError> = Vec::new();
+
+    match app_config.merged {
+        Some(ref name) => {
+            successful_articles_table.set_header(vec![Cell::new("Table of Contents")
+                .add_attribute(Attribute::Bold)
+                .set_alignment(CellAlignment::Center)
+                .fg(Color::Green)]);
+
+            debug!("Creating {:?}", name);
+
+            let base_html_elem = kuchiki::parse_html().one(BASE_HTML_TEMPLATE);
+            let body_elem = base_html_elem.select_first("body").unwrap();
+            let base_path = Path::new(app_config.output_directory.as_deref().unwrap_or("."));
+            let img_dirs_path_name = name.trim_end_matches(".html");
+            let imgs_dir_path = base_path.join(img_dirs_path_name);
+
+            if !(app_config.is_inlining_images || imgs_dir_path.exists()) {
+                info!("Creating imgs dir in {:?} for {}", imgs_dir_path, name);
+                if let Err(e) = std::fs::create_dir(&imgs_dir_path) {
+                    error!("Unable to create imgs dir for HTML file");
+                    let err: PaperoniError = e.into();
+                    errors.push(err);
+                    return Err(errors);
+                };
+            }
+
+            for (idx, article) in articles.iter().enumerate() {
+                let article_elem = article
+                    .node_ref()
+                    .select_first("div[id=\"readability-page-1\"]")
+                    .unwrap();
+
+                let title = article.metadata().title();
+
+                let mut elem_attr = article_elem.attributes.borrow_mut();
+                if let Some(id_attr) = elem_attr.get_mut("id") {
+                    *id_attr = format!("readability-page-{}", idx);
+                }
+
+                for (img_url, mime_type_opt) in &article.img_urls {
+                    if app_config.is_inlining_images {
+                        info!("Inlining images for {}", title);
+                        let result = update_imgs_base64(
+                            article,
+                            img_url,
+                            mime_type_opt.as_deref().unwrap_or("image/*"),
+                        );
+
+                        if let Err(e) = result {
+                            let mut err: PaperoniError = e.into();
+                            err.set_article_source(title);
+                            error!("Unable to copy images to imgs dir for {}", title);
+                            errors.push(err);
+                        }
+
+                        info!("Completed inlining images for {}", title);
+                    } else {
+                        info!("Copying images to imgs dir for {}", title);
+                        let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
+                            let mut err: PaperoniError = e.into();
+                            err.set_article_source(title);
+                            err
+                        });
+                        if let Err(e) = result {
+                            error!("Unable to copy images to imgs dir for {}", title);
+                            errors.push(e);
+                        } else {
+                            info!("Successfully copied images to imgs dir for {}", title);
+                        }
+                    }
+                }
+                bar.inc(1);
+                successful_articles_table.add_row(vec![title]);
+                body_elem.as_node().append(article_elem.as_node().clone());
+                debug!("Added {} to the export HTML file", title);
+            }
+
+            insert_title_elem(&base_html_elem, name);
+            insert_appendix(
+                &base_html_elem,
+                articles
+                    .iter()
+                    .map(|article| (article.metadata(), article.url.as_str()))
+                    .collect(),
+            );
+            inline_css(&base_html_elem, app_config);
+
+            info!("Added title, footer and inlined styles for {}", name);
+
+            info!("Creating export HTML file: {}", name);
+            if let Err(mut err) = File::create(name)
+                .and_then(|mut out_file| base_html_elem.serialize(&mut out_file))
+                .map_err(|e| -> PaperoniError { e.into() })
+            {
+                error!("Failed to serialize articles to file: {}", name);
+                err.set_article_source(&name);
+                errors.push(err);
+                bar.finish_with_message("html generation failed");
+                return Err(errors);
+            };
+
+            bar.finish_with_message("Generated html file\n");
+            debug!("Created {:?}", name);
+            println!("Created {:?}", name);
+        }
+        None => {
+            successful_articles_table
+                .set_header(vec![Cell::new("Downloaded articles")
+                    .add_attribute(Attribute::Bold)
+                    .set_alignment(CellAlignment::Center)
+                    .fg(Color::Green)])
+                .set_content_arrangement(ContentArrangement::Dynamic);
+
+            let mut file_names: HashSet<String> = HashSet::new();
+
+            for article in &articles {
+                let mut file_name = format!(
+                    "{}/{}.html",
+                    app_config.output_directory.as_deref().unwrap_or("."),
+                    article
+                        .metadata()
+                        .title()
+                        .replace("/", " ")
+                        .replace("\\", " ")
+                );
+
+                if file_names.contains(&file_name) {
+                    info!("Article name {:?} already exists", file_name);
+                    file_name = format!(
+                        "{}/{}_{}.html",
+                        app_config.output_directory.as_deref().unwrap_or("."),
+                        article
+                            .metadata()
+                            .title()
+                            .replace("/", " ")
+                            .replace("\\", " "),
+                        file_names.len()
+                    );
+                    info!("Renamed to {:?}", file_name);
+                }
+                file_names.insert(file_name.clone());
+
+                debug!("Creating {:?}", file_name);
+                let export_article = || -> Result<(), PaperoniError> {
+                    let mut out_file = File::create(&file_name)?;
+
+                    if app_config.is_inlining_images {
+                        for (img_url, mime_type_opt) in &article.img_urls {
+                            update_imgs_base64(
+                                article,
+                                img_url,
+                                mime_type_opt.as_deref().unwrap_or("image/*"),
+                            )?
+                        }
+                    } else {
+                        let base_path =
+                            Path::new(app_config.output_directory.as_deref().unwrap_or("."));
+                        let imgs_dir_name = article.metadata().title();
+
+                        if !base_path.join(imgs_dir_name).exists() {
+                            std::fs::create_dir(base_path.join(imgs_dir_name))?;
+                        }
+
+                        let imgs_dir_path = base_path.join(imgs_dir_name);
+                        update_img_urls(article, &imgs_dir_path)?;
+                    }
+
+                    let utf8_encoding =
+                        NodeRef::new_element(create_qualname("meta"), BTreeMap::new());
+                    if let Some(elem_node) = utf8_encoding.as_element() {
+                        let mut elem_attrs = elem_node.attributes.borrow_mut();
+                        elem_attrs.insert("charset", "UTF-8".into());
+                    }
+
+                    if let Ok(head_elem) = article.node_ref().select_first("head") {
+                        let head_elem_node = head_elem.as_node();
+                        head_elem_node.append(utf8_encoding);
+                    };
+
+                    insert_title_elem(article.node_ref(), article.metadata().title());
+                    insert_appendix(article.node_ref(), vec![(article.metadata(), &article.url)]);
+                    inline_css(article.node_ref(), app_config);
+
+                    article.node_ref().serialize(&mut out_file)?;
+                    Ok(())
+                };
+
+                if let Err(mut err) = export_article() {
+                    err.set_article_source(&article.url);
+                    errors.push(err);
+                }
+                debug!("Created {:?}", file_name);
+
+                bar.inc(1);
+                successful_articles_table.add_row(vec![article.metadata().title()]);
+            }
+            bar.finish_with_message("Generated HTML files\n");
+        }
+    }
+
+    if errors.is_empty() {
+        Ok(())
+    } else {
+        Err(errors)
+    }
+}
+
+fn create_qualname(name: &str) -> QualName {
+    QualName::new(
+        None,
+        Namespace::from("http://www.w3.org/1999/xhtml"),
+        LocalName::from(name),
+    )
+}
+
+/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
+fn update_imgs_base64(
+    article: &Article,
+    img_url: &str,
+    mime_type: &str,
+) -> Result<(), std::io::Error> {
+    let temp_dir = std::env::temp_dir();
+    let img_path = temp_dir.join(img_url);
+    let img_bytes = std::fs::read(img_path)?;
+    let img_base64_str = format!("data:image:{};base64,{}", mime_type, encode(img_bytes));
+
+    let img_elems = article
+        .node_ref()
+        .select(&format!("img[src=\"{}\"]", img_url))
+        .unwrap();
+    for img_elem in img_elems {
+        let mut img_attr = img_elem.attributes.borrow_mut();
+        if let Some(src_attr) = img_attr.get_mut("src") {
+            *src_attr = img_base64_str.clone();
+        }
+    }
+    Ok(())
+}
+
+/// Updates the src attribute of `<img>` elements to the new `imgs_dir_path` and copies the image to the new file location
+fn update_img_urls(article: &Article, imgs_dir_path: &Path) -> Result<(), std::io::Error> {
+    let temp_dir = std::env::temp_dir();
+    for (img_url, _) in &article.img_urls {
+        let (from, to) = (temp_dir.join(img_url), imgs_dir_path.join(img_url));
+        info!("Copying {:?} to {:?}", from, to);
+        fs::copy(from, to)?;
+        let img_elems = article
+            .node_ref()
+            .select(&format!("img[src=\"{}\"]", img_url))
+            .unwrap();
+        for img_elem in img_elems {
+            let mut img_attr = img_elem.attributes.borrow_mut();
+            if let Some(src_attr) = img_attr.get_mut("src") {
+                *src_attr = imgs_dir_path.join(img_url).to_str().unwrap().into();
+            }
+        }
+    }
+    Ok(())
+}
+
+/// Creates a `<title>` element in an HTML document with the value set to the article's title
+fn insert_title_elem(root_node: &NodeRef, title: &str) {
+    let title_content = NodeRef::new_text(title);
+    let title_elem = NodeRef::new_element(create_qualname("title"), BTreeMap::new());
+    title_elem.append(title_content);
+    match root_node.select_first("head") {
+        Ok(head_elem) => {
+            head_elem.as_node().append(title_elem);
+        }
+        Err(_) => {
+            debug!("{}", HEAD_ELEM_NOT_FOUND);
+            let html_elem = root_node.select_first("html").unwrap();
+            let head_elem = NodeRef::new_element(create_qualname("head"), BTreeMap::new());
+            head_elem.append(title_elem);
+            html_elem.as_node().prepend(head_elem);
+        }
+    }
+}
+
+/// Creates the appendix in an HTML document where article sources are added in a `<footer>` element
+fn insert_appendix(root_node: &NodeRef, article_links: Vec<(&MetaData, &str)>) {
+    let link_tags: String = article_links
+        .iter()
+        .map(|(meta_data, url)| {
+            let article_name = if !meta_data.title().is_empty() {
+                meta_data.title()
+            } else {
+                url
+            };
+            format!("<a href=\"{}\">{}</a><br></br>", url, article_name)
+        })
+        .collect();
+    let footer_inner_html = format!("<h2>Appendix</h2><h2>Article sources</h3>{}", link_tags);
+    let footer_elem =
+        kuchiki::parse_fragment(create_qualname("footer"), Vec::new()).one(footer_inner_html);
+    root_node.append(footer_elem);
+}
+
+/// Inlines the CSS stylesheets into the HTML article node
+fn inline_css(root_node: &NodeRef, app_config: &AppConfig) {
+    let body_stylesheet = include_str!("./assets/body.min.css");
+    let header_stylesheet = include_str!("./assets/headers.min.css");
+    let mut css_str = String::new();
+    match app_config.css_config {
+        cli::CSSConfig::NoHeaders => {
+            css_str.push_str(body_stylesheet);
+        }
+        cli::CSSConfig::All => {
+            css_str.push_str(body_stylesheet);
+            css_str.push_str(header_stylesheet);
+        }
+        cli::CSSConfig::None => {
+            return;
+        }
+    }
+    let css_html_str = format!("<style>{}</style>", css_str);
+    let style_container =
+        kuchiki::parse_fragment(create_qualname("div"), Vec::new()).one(css_html_str);
+    let style_elem = style_container.select_first("style").unwrap();
+    match root_node.select_first("head") {
+        Ok(head_elem) => {
+            head_elem.as_node().prepend(style_elem.as_node().to_owned());
+        }
+        Err(_) => {
+            debug!("{}", HEAD_ELEM_NOT_FOUND);
+            let html_elem = root_node.select_first("html").unwrap();
+            let head_elem = NodeRef::new_element(create_qualname("head"), BTreeMap::new());
+            head_elem.prepend(style_elem.as_node().to_owned());
+            html_elem.as_node().prepend(head_elem);
+        }
+    }
+
+    // Remove the <link> of the stylesheet since styles are now inlined
+    if let Ok(style_link_elem) = root_node.select_first("link[href=\"stylesheet.css\"]") {
+        style_link_elem.as_node().detach();
+    };
+}
--- a/src/http.rs
+++ b/src/http.rs
@ -9,7 +9,7 @@ use url::Url;

 use crate::cli::AppConfig;
 use crate::errors::{ErrorKind, ImgError, PaperoniError};
-use crate::extractor::Extractor;
+use crate::extractor::Article;
 type HTMLResource = (String, String);

 pub fn download(
@ -17,7 +17,7 @@ pub fn download(
    bar: &ProgressBar,
    partial_downloads: &mut Vec<PartialDownload>,
    errors: &mut Vec<PaperoniError>,
-) -> Vec<Extractor> {
+) -> Vec<Article> {
    task::block_on(async {
        let urls_iter = app_config.urls.iter().map(|url| fetch_html(url));
        let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn);
@ -26,7 +26,7 @@ pub fn download(
            match fetch_result {
                Ok((url, html)) => {
                    debug!("Extracting {}", &url);
-                    let mut extractor = Extractor::from_html(&html, &url);
+                    let mut extractor = Article::from_html(&html, &url);
                    bar.set_message("Extracting...");
                    match extractor.extract_content() {
                        Ok(_) => {
@ -185,7 +185,7 @@ async fn process_img_response<'a>(
 }

 pub async fn download_images(
-    extractor: &mut Extractor,
+    extractor: &mut Article,
    article_origin: &Url,
    bar: &ProgressBar,
 ) -> Result<(), Vec<ImgError>> {
@ -237,7 +237,7 @@ pub async fn download_images(
    let replace_existing_img_src = |img_item: ImgItem| -> (String, Option<String>) {
        let (img_url, img_path, img_mime) = img_item;
        let img_ref = extractor
-            .article()
+            .node_ref()
            .select_first(&format!("img[src='{}']", img_url))
            .expect("Image node does not exist");
        let mut img_node = img_ref.attributes.borrow_mut();
--- a/src/logs.rs
+++ b/src/logs.rs
@ -11,7 +11,7 @@ use crate::errors::PaperoniError;

 pub fn display_summary(
    initial_article_count: usize,
-    succesful_articles_table: Table,
+    successful_articles_table: Table,
    partial_downloads: Vec<PartialDownload>,
    errors: Vec<PaperoniError>,
 ) {
@ -31,7 +31,7 @@ pub fn display_summary(
    );

    if successfully_downloaded_count > 0 {
-        println!("{}", succesful_articles_table);
+        println!("{}", successful_articles_table);
    }

    if partial_downloads_count > 0 {
--- a/src/main.rs
+++ b/src/main.rs
@ -3,6 +3,7 @@ extern crate lazy_static;

 use std::process::exit;

+use colored::Colorize;
 use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
 use comfy_table::{ContentArrangement, Table};
 use http::download;
@ -12,6 +13,7 @@ mod cli;
 mod epub;
 mod errors;
 mod extractor;
+mod html;
 /// This module is responsible for async HTTP calls for downloading
 /// the HTML content and images
 mod http;
@ -20,13 +22,14 @@ mod moz_readability;

 use cli::AppConfig;
 use epub::generate_epubs;
+use html::generate_html_exports;
 use logs::display_summary;

 fn main() {
    let app_config = match cli::AppConfig::init_with_cli() {
        Ok(app_config) => app_config,
        Err(err) => {
-            eprintln!("{}", err);
+            eprintln!("{}: {}", "ERROR".bold().bright_red(), err);
            exit(1);
        }
    };
@ -64,22 +67,33 @@ fn run(app_config: AppConfig) {
    let articles = download(&app_config, &bar, &mut partial_downloads, &mut errors);
    bar.finish_with_message("Downloaded articles");

-    let mut succesful_articles_table = Table::new();
-    succesful_articles_table
+    let mut successful_articles_table = Table::new();
+    successful_articles_table
        .load_preset(UTF8_FULL)
        .load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
        .set_content_arrangement(ContentArrangement::Dynamic);
-    match generate_epubs(articles, &app_config, &mut succesful_articles_table) {
-        Ok(_) => (),
-        Err(gen_epub_errors) => {
-            errors.extend(gen_epub_errors);
+
+    match app_config.export_type {
+        cli::ExportType::EPUB => {
+            match generate_epubs(articles, &app_config, &mut successful_articles_table) {
+                Ok(_) => (),
+                Err(gen_epub_errors) => {
+                    errors.extend(gen_epub_errors);
+                }
+            };
        }
-    };
+        cli::ExportType::HTML => {
+            match generate_html_exports(articles, &app_config, &mut successful_articles_table) {
+                Ok(_) => (),
+                Err(gen_html_errors) => errors.extend(gen_html_errors),
+            }
+        }
+    }

    let has_errors = !errors.is_empty() || !partial_downloads.is_empty();
    display_summary(
        app_config.urls.len(),
-        succesful_articles_table,
+        successful_articles_table,
        partial_downloads,
        errors,
    );