commit
dc16f9f52b
6 changed files with 345 additions and 80 deletions
153
src/cli.rs
153
src/cli.rs
|
@ -9,13 +9,14 @@ type Error = crate::errors::CliError<AppConfigBuilderError>;
|
||||||
|
|
||||||
const DEFAULT_MAX_CONN: usize = 8;
|
const DEFAULT_MAX_CONN: usize = 8;
|
||||||
|
|
||||||
#[derive(derive_builder::Builder)]
|
#[derive(derive_builder::Builder, Debug)]
|
||||||
pub struct AppConfig {
|
pub struct AppConfig {
|
||||||
/// Article urls
|
/// Article urls
|
||||||
pub urls: Vec<String>,
|
pub urls: Vec<String>,
|
||||||
pub max_conn: usize,
|
pub max_conn: usize,
|
||||||
/// Path to file of multiple articles into a single article
|
/// Path to file of multiple articles into a single article
|
||||||
pub merged: Option<String>,
|
pub merged: Option<String>,
|
||||||
|
// TODO: Change type to Path
|
||||||
pub output_directory: Option<String>,
|
pub output_directory: Option<String>,
|
||||||
pub log_level: LogLevel,
|
pub log_level: LogLevel,
|
||||||
pub can_disable_progress_bar: bool,
|
pub can_disable_progress_bar: bool,
|
||||||
|
@ -95,7 +96,7 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
||||||
None => DEFAULT_MAX_CONN,
|
None => DEFAULT_MAX_CONN,
|
||||||
})
|
})
|
||||||
.merged(arg_matches.value_of("output-name").map(|name| {
|
.merged(arg_matches.value_of("output-name").map(|name| {
|
||||||
let file_ext = format!(".{}", arg_matches.value_of("export").unwrap());
|
let file_ext = format!(".{}", arg_matches.value_of("export").unwrap_or("epub"));
|
||||||
if name.ends_with(&file_ext) {
|
if name.ends_with(&file_ext) {
|
||||||
name.to_owned()
|
name.to_owned()
|
||||||
} else {
|
} else {
|
||||||
|
@ -132,10 +133,11 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
||||||
)
|
)
|
||||||
.output_directory(
|
.output_directory(
|
||||||
arg_matches
|
arg_matches
|
||||||
.value_of("output_directory")
|
.value_of("output-directory")
|
||||||
.map(|output_directory| {
|
.map(|output_directory| {
|
||||||
let path = Path::new(output_directory);
|
let path = Path::new(output_directory);
|
||||||
if !path.exists() {
|
if !path.exists() {
|
||||||
|
// TODO: Create the directory
|
||||||
Err(Error::OutputDirectoryNotExists)
|
Err(Error::OutputDirectoryNotExists)
|
||||||
} else if !path.is_dir() {
|
} else if !path.is_dir() {
|
||||||
Err(Error::WrongOutputDirectory)
|
Err(Error::WrongOutputDirectory)
|
||||||
|
@ -157,14 +159,24 @@ impl<'a> TryFrom<ArgMatches<'a>> for AppConfig {
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.export_type({
|
.export_type({
|
||||||
let export_type = arg_matches.value_of("export").unwrap();
|
let export_type = arg_matches.value_of("export").unwrap_or("epub");
|
||||||
if export_type == "html" {
|
if export_type == "html" {
|
||||||
ExportType::HTML
|
ExportType::HTML
|
||||||
} else {
|
} else {
|
||||||
ExportType::EPUB
|
ExportType::EPUB
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.is_inlining_images(arg_matches.is_present("inline-images"))
|
.is_inlining_images(
|
||||||
|
(if arg_matches.is_present("inline-images") {
|
||||||
|
if arg_matches.value_of("export") == Some("html") {
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Err(Error::WrongExportInliningImages)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(false)
|
||||||
|
})?,
|
||||||
|
)
|
||||||
.try_init()
|
.try_init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -190,3 +202,134 @@ pub enum ExportType {
|
||||||
HTML,
|
HTML,
|
||||||
EPUB,
|
EPUB,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clap_config_errors() {
|
||||||
|
let yaml_config = load_yaml!("cli_config.yml");
|
||||||
|
let app = App::from_yaml(yaml_config);
|
||||||
|
|
||||||
|
// It returns Ok when only a url is passed
|
||||||
|
let result = app
|
||||||
|
.clone()
|
||||||
|
.get_matches_from_safe(vec!["paperoni", "http://example.org"]);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
// It returns an error when no args are passed
|
||||||
|
let result = app.clone().get_matches_from_safe(vec!["paperoni"]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(
|
||||||
|
clap::ErrorKind::MissingArgumentOrSubcommand,
|
||||||
|
result.unwrap_err().kind
|
||||||
|
);
|
||||||
|
|
||||||
|
// It returns an error when both output-dir and merge are used
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--merge",
|
||||||
|
"foo",
|
||||||
|
"--output-dir",
|
||||||
|
"~",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(clap::ErrorKind::ArgumentConflict, result.unwrap_err().kind);
|
||||||
|
|
||||||
|
// It returns an error when both no-css and no-header-css are used
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--no-css",
|
||||||
|
"--no-header-css",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(clap::ErrorKind::ArgumentConflict, result.unwrap_err().kind);
|
||||||
|
|
||||||
|
// It returns an error when inline-toc is used without merge
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--inline-toc",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(
|
||||||
|
clap::ErrorKind::MissingRequiredArgument,
|
||||||
|
result.unwrap_err().kind
|
||||||
|
);
|
||||||
|
|
||||||
|
// It returns an error when inline-images is used without export
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--inline-images",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(
|
||||||
|
clap::ErrorKind::MissingRequiredArgument,
|
||||||
|
result.unwrap_err().kind
|
||||||
|
);
|
||||||
|
|
||||||
|
// It returns an error when export is given an invalid value
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--export",
|
||||||
|
"pdf",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert_eq!(clap::ErrorKind::InvalidValue, result.unwrap_err().kind);
|
||||||
|
|
||||||
|
// It returns an error when a max-conn is given a negative number.
|
||||||
|
let result = app.clone().get_matches_from_safe(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--max-conn",
|
||||||
|
"-1",
|
||||||
|
]);
|
||||||
|
assert!(result.is_err());
|
||||||
|
// The cli is configured not to accept negative numbers so passing "-1" would have it be read as a flag called 1
|
||||||
|
assert_eq!(clap::ErrorKind::UnknownArgument, result.unwrap_err().kind);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_init_with_cli() {
|
||||||
|
let yaml_config = load_yaml!("cli_config.yml");
|
||||||
|
let app = App::from_yaml(yaml_config);
|
||||||
|
|
||||||
|
// It returns an error when the urls passed are whitespace
|
||||||
|
let matches = app.clone().get_matches_from(vec!["paperoni", ""]);
|
||||||
|
let app_config = AppConfig::try_from(matches);
|
||||||
|
assert!(app_config.is_err());
|
||||||
|
assert_eq!(Error::NoUrls, app_config.unwrap_err());
|
||||||
|
|
||||||
|
// It returns an error when inline-toc is used when exporting to HTML
|
||||||
|
let matches = app.clone().get_matches_from(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--merge",
|
||||||
|
"foo",
|
||||||
|
"--export",
|
||||||
|
"html",
|
||||||
|
"--inline-toc",
|
||||||
|
]);
|
||||||
|
let app_config = AppConfig::try_from(matches);
|
||||||
|
assert!(app_config.is_err());
|
||||||
|
assert_eq!(Error::WrongExportInliningToC, app_config.unwrap_err());
|
||||||
|
// It returns an Ok when inline-toc is used when exporting to epub
|
||||||
|
let matches = app.clone().get_matches_from(vec![
|
||||||
|
"paperoni",
|
||||||
|
"http://example.org",
|
||||||
|
"--merge",
|
||||||
|
"foo",
|
||||||
|
"--export",
|
||||||
|
"epub",
|
||||||
|
"--inline-toc",
|
||||||
|
]);
|
||||||
|
assert!(AppConfig::try_from(matches).is_ok());
|
||||||
|
|
||||||
|
// It returns an error when inline-images is used when exporting to epub
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -12,7 +12,7 @@ args:
|
||||||
long: file
|
long: file
|
||||||
help: Input file containing links
|
help: Input file containing links
|
||||||
takes_value: true
|
takes_value: true
|
||||||
- output_directory:
|
- output-directory:
|
||||||
short: o
|
short: o
|
||||||
long: output-dir
|
long: output-dir
|
||||||
help: Directory to store output epub documents
|
help: Directory to store output epub documents
|
||||||
|
@ -70,7 +70,6 @@ args:
|
||||||
possible_values: [html, epub]
|
possible_values: [html, epub]
|
||||||
value_name: type
|
value_name: type
|
||||||
takes_value: true
|
takes_value: true
|
||||||
default_value: epub
|
|
||||||
- inline-images:
|
- inline-images:
|
||||||
long: inline-images
|
long: inline-images
|
||||||
help: Inlines the article images when exporting to HTML using base64. Pass --help to learn more.
|
help: Inlines the article images when exporting to HTML using base64. Pass --help to learn more.
|
||||||
|
|
|
@ -138,6 +138,14 @@ pub enum LogError {
|
||||||
CreateLogDirectoryError(#[from] std::io::Error),
|
CreateLogDirectoryError(#[from] std::io::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dumb hack to allow for comparing errors in testing.
|
||||||
|
// derive macros cannot be used because underlying errors like io::Error do not derive PartialEq
|
||||||
|
impl PartialEq for LogError {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
format!("{:?}", self) == format!("{:?}", other)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum CliError<BuilderError: Debug + Display> {
|
pub enum CliError<BuilderError: Debug + Display> {
|
||||||
#[error("Failed to open file with urls: {0}")]
|
#[error("Failed to open file with urls: {0}")]
|
||||||
|
@ -156,6 +164,16 @@ pub enum CliError<BuilderError: Debug + Display> {
|
||||||
OutputDirectoryNotExists,
|
OutputDirectoryNotExists,
|
||||||
#[error("Unable to start logger!\n{0}")]
|
#[error("Unable to start logger!\n{0}")]
|
||||||
LogError(#[from] LogError),
|
LogError(#[from] LogError),
|
||||||
#[error("The --inline-toc can only be used exporting to epub")]
|
#[error("The --inline-toc flag can only be used when exporting to epub")]
|
||||||
WrongExportInliningToC,
|
WrongExportInliningToC,
|
||||||
|
#[error("The --inline-images flag can only be used when exporting to html")]
|
||||||
|
WrongExportInliningImages,
|
||||||
|
}
|
||||||
|
|
||||||
|
// dumb hack to allow for comparing errors in testing.
|
||||||
|
// derive macros cannot be used because underlying errors like io::Error do not derive PartialEq
|
||||||
|
impl<T: Debug + Display> PartialEq for CliError<T> {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
format!("{:?}", self) == format!("{:?}", other)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ use kuchiki::{traits::*, NodeRef};
|
||||||
use crate::errors::PaperoniError;
|
use crate::errors::PaperoniError;
|
||||||
use crate::moz_readability::{MetaData, Readability};
|
use crate::moz_readability::{MetaData, Readability};
|
||||||
|
|
||||||
|
/// A tuple of the url and an Option of the resource's MIME type
|
||||||
pub type ResourceInfo = (String, Option<String>);
|
pub type ResourceInfo = (String, Option<String>);
|
||||||
|
|
||||||
pub struct Article {
|
pub struct Article {
|
||||||
|
|
239
src/html.rs
239
src/html.rs
|
@ -12,7 +12,7 @@ use kuchiki::{traits::*, NodeRef};
|
||||||
use log::{debug, error, info};
|
use log::{debug, error, info};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
cli::{self, AppConfig},
|
cli::{self, AppConfig, CSSConfig},
|
||||||
errors::PaperoniError,
|
errors::PaperoniError,
|
||||||
extractor::Article,
|
extractor::Article,
|
||||||
moz_readability::MetaData,
|
moz_readability::MetaData,
|
||||||
|
@ -91,38 +91,33 @@ pub fn generate_html_exports(
|
||||||
*id_attr = format!("readability-page-{}", idx);
|
*id_attr = format!("readability-page-{}", idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (img_url, mime_type_opt) in &article.img_urls {
|
if app_config.is_inlining_images {
|
||||||
if app_config.is_inlining_images {
|
info!("Inlining images for {}", title);
|
||||||
info!("Inlining images for {}", title);
|
let result = update_imgs_base64(article);
|
||||||
let result = update_imgs_base64(
|
|
||||||
article,
|
|
||||||
img_url,
|
|
||||||
mime_type_opt.as_deref().unwrap_or("image/*"),
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
let mut err: PaperoniError = e.into();
|
let mut err: PaperoniError = e.into();
|
||||||
err.set_article_source(title);
|
err.set_article_source(title);
|
||||||
error!("Unable to copy images to imgs dir for {}", title);
|
error!("Unable to copy images to imgs dir for {}", title);
|
||||||
errors.push(err);
|
errors.push(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Completed inlining images for {}", title);
|
info!("Completed inlining images for {}", title);
|
||||||
|
} else {
|
||||||
|
info!("Copying images to imgs dir for {}", title);
|
||||||
|
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
|
||||||
|
let mut err: PaperoniError = e.into();
|
||||||
|
err.set_article_source(title);
|
||||||
|
err
|
||||||
|
});
|
||||||
|
if let Err(e) = result {
|
||||||
|
error!("Unable to copy images to imgs dir for {}", title);
|
||||||
|
errors.push(e);
|
||||||
} else {
|
} else {
|
||||||
info!("Copying images to imgs dir for {}", title);
|
info!("Successfully copied images to imgs dir for {}", title);
|
||||||
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
|
|
||||||
let mut err: PaperoniError = e.into();
|
|
||||||
err.set_article_source(title);
|
|
||||||
err
|
|
||||||
});
|
|
||||||
if let Err(e) = result {
|
|
||||||
error!("Unable to copy images to imgs dir for {}", title);
|
|
||||||
errors.push(e);
|
|
||||||
} else {
|
|
||||||
info!("Successfully copied images to imgs dir for {}", title);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bar.inc(1);
|
bar.inc(1);
|
||||||
successful_articles_table.add_row(vec![title]);
|
successful_articles_table.add_row(vec![title]);
|
||||||
body_elem.as_node().append(article_elem.as_node().clone());
|
body_elem.as_node().append(article_elem.as_node().clone());
|
||||||
|
@ -137,7 +132,8 @@ pub fn generate_html_exports(
|
||||||
.map(|article| (article.metadata(), article.url.as_str()))
|
.map(|article| (article.metadata(), article.url.as_str()))
|
||||||
.collect(),
|
.collect(),
|
||||||
);
|
);
|
||||||
inline_css(&base_html_elem, app_config);
|
inline_css(&base_html_elem, &app_config.css_config);
|
||||||
|
remove_existing_stylesheet_link(&base_html_elem);
|
||||||
|
|
||||||
info!("Added title, footer and inlined styles for {}", name);
|
info!("Added title, footer and inlined styles for {}", name);
|
||||||
|
|
||||||
|
@ -199,13 +195,7 @@ pub fn generate_html_exports(
|
||||||
let mut out_file = File::create(&file_name)?;
|
let mut out_file = File::create(&file_name)?;
|
||||||
|
|
||||||
if app_config.is_inlining_images {
|
if app_config.is_inlining_images {
|
||||||
for (img_url, mime_type_opt) in &article.img_urls {
|
update_imgs_base64(article)?;
|
||||||
update_imgs_base64(
|
|
||||||
article,
|
|
||||||
img_url,
|
|
||||||
mime_type_opt.as_deref().unwrap_or("image/*"),
|
|
||||||
)?
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let base_path =
|
let base_path =
|
||||||
Path::new(app_config.output_directory.as_deref().unwrap_or("."));
|
Path::new(app_config.output_directory.as_deref().unwrap_or("."));
|
||||||
|
@ -233,7 +223,8 @@ pub fn generate_html_exports(
|
||||||
|
|
||||||
insert_title_elem(article.node_ref(), article.metadata().title());
|
insert_title_elem(article.node_ref(), article.metadata().title());
|
||||||
insert_appendix(article.node_ref(), vec![(article.metadata(), &article.url)]);
|
insert_appendix(article.node_ref(), vec![(article.metadata(), &article.url)]);
|
||||||
inline_css(article.node_ref(), app_config);
|
inline_css(article.node_ref(), &app_config.css_config);
|
||||||
|
remove_existing_stylesheet_link(article.node_ref());
|
||||||
|
|
||||||
article.node_ref().serialize(&mut out_file)?;
|
article.node_ref().serialize(&mut out_file)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -268,24 +259,26 @@ fn create_qualname(name: &str) -> QualName {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
|
/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
|
||||||
fn update_imgs_base64(
|
fn update_imgs_base64(article: &Article) -> Result<(), std::io::Error> {
|
||||||
article: &Article,
|
|
||||||
img_url: &str,
|
|
||||||
mime_type: &str,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
let temp_dir = std::env::temp_dir();
|
let temp_dir = std::env::temp_dir();
|
||||||
let img_path = temp_dir.join(img_url);
|
for (img_url, mime_type) in &article.img_urls {
|
||||||
let img_bytes = std::fs::read(img_path)?;
|
let img_path = temp_dir.join(img_url);
|
||||||
let img_base64_str = format!("data:image:{};base64,{}", mime_type, encode(img_bytes));
|
let img_bytes = std::fs::read(img_path)?;
|
||||||
|
let img_base64_str = format!(
|
||||||
|
"data:image:{};base64,{}",
|
||||||
|
mime_type.as_deref().unwrap_or("image/*"),
|
||||||
|
encode(img_bytes)
|
||||||
|
);
|
||||||
|
|
||||||
let img_elems = article
|
let img_elems = article
|
||||||
.node_ref()
|
.node_ref()
|
||||||
.select(&format!("img[src=\"{}\"]", img_url))
|
.select(&format!("img[src=\"{}\"]", img_url))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for img_elem in img_elems {
|
for img_elem in img_elems {
|
||||||
let mut img_attr = img_elem.attributes.borrow_mut();
|
let mut img_attr = img_elem.attributes.borrow_mut();
|
||||||
if let Some(src_attr) = img_attr.get_mut("src") {
|
if let Some(src_attr) = img_attr.get_mut("src") {
|
||||||
*src_attr = img_base64_str.clone();
|
*src_attr = img_base64_str.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -344,18 +337,23 @@ fn insert_appendix(root_node: &NodeRef, article_links: Vec<(&MetaData, &str)>) {
|
||||||
format!("<a href=\"{}\">{}</a><br></br>", url, article_name)
|
format!("<a href=\"{}\">{}</a><br></br>", url, article_name)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let footer_inner_html = format!("<h2>Appendix</h2><h2>Article sources</h3>{}", link_tags);
|
let footer_inner_html = format!(
|
||||||
let footer_elem =
|
"<footer><h2>Appendix</h2><h3>Article sources</h3>{}</footer>",
|
||||||
kuchiki::parse_fragment(create_qualname("footer"), Vec::new()).one(footer_inner_html);
|
link_tags
|
||||||
root_node.append(footer_elem);
|
);
|
||||||
|
let footer_container =
|
||||||
|
kuchiki::parse_fragment(create_qualname("div"), Vec::new()).one(footer_inner_html);
|
||||||
|
let footer_elem = footer_container.select_first("footer").unwrap();
|
||||||
|
|
||||||
|
root_node.append(footer_elem.as_node().clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Inlines the CSS stylesheets into the HTML article node
|
/// Inlines the CSS stylesheets into the HTML article node
|
||||||
fn inline_css(root_node: &NodeRef, app_config: &AppConfig) {
|
fn inline_css(root_node: &NodeRef, css_config: &CSSConfig) {
|
||||||
let body_stylesheet = include_str!("./assets/body.min.css");
|
let body_stylesheet = include_str!("./assets/body.min.css");
|
||||||
let header_stylesheet = include_str!("./assets/headers.min.css");
|
let header_stylesheet = include_str!("./assets/headers.min.css");
|
||||||
let mut css_str = String::new();
|
let mut css_str = String::new();
|
||||||
match app_config.css_config {
|
match css_config {
|
||||||
cli::CSSConfig::NoHeaders => {
|
cli::CSSConfig::NoHeaders => {
|
||||||
css_str.push_str(body_stylesheet);
|
css_str.push_str(body_stylesheet);
|
||||||
}
|
}
|
||||||
|
@ -371,21 +369,118 @@ fn inline_css(root_node: &NodeRef, app_config: &AppConfig) {
|
||||||
let style_container =
|
let style_container =
|
||||||
kuchiki::parse_fragment(create_qualname("div"), Vec::new()).one(css_html_str);
|
kuchiki::parse_fragment(create_qualname("div"), Vec::new()).one(css_html_str);
|
||||||
let style_elem = style_container.select_first("style").unwrap();
|
let style_elem = style_container.select_first("style").unwrap();
|
||||||
match root_node.select_first("head") {
|
let head_elem = root_node.select_first("head").expect(HEAD_ELEM_NOT_FOUND);
|
||||||
Ok(head_elem) => {
|
head_elem.as_node().prepend(style_elem.as_node().to_owned());
|
||||||
head_elem.as_node().prepend(style_elem.as_node().to_owned());
|
}
|
||||||
}
|
|
||||||
Err(_) => {
|
|
||||||
debug!("{}", HEAD_ELEM_NOT_FOUND);
|
|
||||||
let html_elem = root_node.select_first("html").unwrap();
|
|
||||||
let head_elem = NodeRef::new_element(create_qualname("head"), BTreeMap::new());
|
|
||||||
head_elem.prepend(style_elem.as_node().to_owned());
|
|
||||||
html_elem.as_node().prepend(head_elem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove the <link> of the stylesheet since styles are now inlined
|
/// Removes the <link> of the stylesheet. This is used when inlining styles
|
||||||
|
fn remove_existing_stylesheet_link(root_node: &NodeRef) {
|
||||||
if let Ok(style_link_elem) = root_node.select_first("link[href=\"stylesheet.css\"]") {
|
if let Ok(style_link_elem) = root_node.select_first("link[href=\"stylesheet.css\"]") {
|
||||||
style_link_elem.as_node().detach();
|
style_link_elem.as_node().detach();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
#[test]
|
||||||
|
fn test_insert_title_elem() {
|
||||||
|
let title = "Sample title";
|
||||||
|
let html_str = r#"<html><head><meta charset="UTF-8"/></head><body></body></html>"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
assert_eq!(0, doc.select("title").unwrap().count());
|
||||||
|
|
||||||
|
insert_title_elem(&doc, title);
|
||||||
|
assert_eq!(1, doc.select("title").unwrap().count());
|
||||||
|
assert_eq!(title, doc.select_first("title").unwrap().text_contents());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_create_qualname() {
|
||||||
|
let name = "div";
|
||||||
|
assert_eq!(
|
||||||
|
create_qualname(name),
|
||||||
|
QualName::new(
|
||||||
|
None,
|
||||||
|
Namespace::from("http://www.w3.org/1999/xhtml"),
|
||||||
|
LocalName::from(name)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_inline_css() {
|
||||||
|
let html_str = r#"<html>
|
||||||
|
<head><meta charset="UTF-8"/></head>
|
||||||
|
<body>
|
||||||
|
<p>Lorem ipsum sample text goes here.</p>
|
||||||
|
</body>
|
||||||
|
</html>"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
let body_stylesheet = include_str!("./assets/body.min.css");
|
||||||
|
let header_stylesheet = include_str!("./assets/headers.min.css");
|
||||||
|
assert_eq!(0, doc.select("style").unwrap().count());
|
||||||
|
|
||||||
|
inline_css(&doc, &CSSConfig::None);
|
||||||
|
assert_eq!(0, doc.select("style").unwrap().count());
|
||||||
|
|
||||||
|
inline_css(&doc, &CSSConfig::NoHeaders);
|
||||||
|
assert_eq!(1, doc.select("style").unwrap().count());
|
||||||
|
let style_elem = doc.select_first("style").unwrap();
|
||||||
|
assert_eq!(body_stylesheet, style_elem.text_contents());
|
||||||
|
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
inline_css(&doc, &CSSConfig::All);
|
||||||
|
assert_eq!(1, doc.select("style").unwrap().count());
|
||||||
|
let style_elem = doc.select_first("style").unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
format!("{}{}", body_stylesheet, header_stylesheet),
|
||||||
|
style_elem.text_contents()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_remove_existing_stylesheet_link() {
|
||||||
|
let html_str = r#"<html>
|
||||||
|
<head><link href="stylesheet.css"></link></head>
|
||||||
|
<body><p>Lorem ipsum sample text goes here.</p></body></html>"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
assert_eq!(1, doc.select("link").unwrap().count());
|
||||||
|
remove_existing_stylesheet_link(&doc);
|
||||||
|
assert_eq!(0, doc.select("link").unwrap().count());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_insert_appendix() {
|
||||||
|
let html_str = r#"<html>
|
||||||
|
<head><meta charset="UTF-8"/></head>
|
||||||
|
<body>
|
||||||
|
<p>Lorem ipsum sample text goes here.</p>
|
||||||
|
</body>
|
||||||
|
</html>"#;
|
||||||
|
let doc = kuchiki::parse_html().one(html_str);
|
||||||
|
let meta_data = MetaData::new();
|
||||||
|
|
||||||
|
assert_eq!(0, doc.select("footer").unwrap().count());
|
||||||
|
|
||||||
|
insert_appendix(&doc, vec![(&meta_data, "http://example.org")]);
|
||||||
|
|
||||||
|
assert_eq!(1, doc.select("footer").unwrap().count());
|
||||||
|
assert_eq!(1, doc.select("footer > h2").unwrap().count());
|
||||||
|
assert_eq!(
|
||||||
|
"Appendix",
|
||||||
|
doc.select_first("footer > h2").unwrap().text_contents()
|
||||||
|
);
|
||||||
|
assert_eq!(1, doc.select("footer > h3").unwrap().count());
|
||||||
|
assert_eq!(
|
||||||
|
"Article sources",
|
||||||
|
doc.select_first("footer > h3").unwrap().text_contents()
|
||||||
|
);
|
||||||
|
assert_eq!(1, doc.select("a").unwrap().count());
|
||||||
|
|
||||||
|
let anchor_elem = doc.select_first("a").unwrap();
|
||||||
|
assert_eq!("http://example.org", anchor_elem.text_contents());
|
||||||
|
let anchor_attrs = anchor_elem.attributes.borrow();
|
||||||
|
assert_eq!(Some("http://example.org"), anchor_attrs.get("href"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -150,6 +150,15 @@ async fn process_img_response<'a>(
|
||||||
let img_mime = img_response
|
let img_mime = img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| mime.essence().to_string());
|
.map(|mime| mime.essence().to_string());
|
||||||
|
if let Some(mime_str) = &img_mime {
|
||||||
|
if !mime_str.starts_with("image/") {
|
||||||
|
return Err(ErrorKind::HTTPError(format!(
|
||||||
|
"Invalid image MIME type: {} for {}",
|
||||||
|
mime_str, url
|
||||||
|
))
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
let img_ext = match img_response
|
let img_ext = match img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
||||||
|
|
Reference in a new issue