refactor: refactor update_imgs_base64

chore: add doc comment on ResourceType alias

fix: add error when image MIME type is invalid on an image
This commit is contained in:
Kenneth Gitere 2021-07-28 09:10:22 +03:00
parent 0b19376f59
commit 07479afeac
3 changed files with 51 additions and 50 deletions

View file

@ -4,6 +4,7 @@ use kuchiki::{traits::*, NodeRef};
use crate::errors::PaperoniError; use crate::errors::PaperoniError;
use crate::moz_readability::{MetaData, Readability}; use crate::moz_readability::{MetaData, Readability};
/// A tuple of the url and an Option of the resource's MIME type
pub type ResourceInfo = (String, Option<String>); pub type ResourceInfo = (String, Option<String>);
pub struct Article { pub struct Article {

View file

@ -91,38 +91,33 @@ pub fn generate_html_exports(
*id_attr = format!("readability-page-{}", idx); *id_attr = format!("readability-page-{}", idx);
} }
for (img_url, mime_type_opt) in &article.img_urls { if app_config.is_inlining_images {
if app_config.is_inlining_images { info!("Inlining images for {}", title);
info!("Inlining images for {}", title); let result = update_imgs_base64(article);
let result = update_imgs_base64(
article,
img_url,
mime_type_opt.as_deref().unwrap_or("image/*"),
);
if let Err(e) = result { if let Err(e) = result {
let mut err: PaperoniError = e.into(); let mut err: PaperoniError = e.into();
err.set_article_source(title); err.set_article_source(title);
error!("Unable to copy images to imgs dir for {}", title); error!("Unable to copy images to imgs dir for {}", title);
errors.push(err); errors.push(err);
} }
info!("Completed inlining images for {}", title); info!("Completed inlining images for {}", title);
} else {
info!("Copying images to imgs dir for {}", title);
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
let mut err: PaperoniError = e.into();
err.set_article_source(title);
err
});
if let Err(e) = result {
error!("Unable to copy images to imgs dir for {}", title);
errors.push(e);
} else { } else {
info!("Copying images to imgs dir for {}", title); info!("Successfully copied images to imgs dir for {}", title);
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
let mut err: PaperoniError = e.into();
err.set_article_source(title);
err
});
if let Err(e) = result {
error!("Unable to copy images to imgs dir for {}", title);
errors.push(e);
} else {
info!("Successfully copied images to imgs dir for {}", title);
}
} }
} }
bar.inc(1); bar.inc(1);
successful_articles_table.add_row(vec![title]); successful_articles_table.add_row(vec![title]);
body_elem.as_node().append(article_elem.as_node().clone()); body_elem.as_node().append(article_elem.as_node().clone());
@ -200,13 +195,7 @@ pub fn generate_html_exports(
let mut out_file = File::create(&file_name)?; let mut out_file = File::create(&file_name)?;
if app_config.is_inlining_images { if app_config.is_inlining_images {
for (img_url, mime_type_opt) in &article.img_urls { update_imgs_base64(article)?;
update_imgs_base64(
article,
img_url,
mime_type_opt.as_deref().unwrap_or("image/*"),
)?
}
} else { } else {
let base_path = let base_path =
Path::new(app_config.output_directory.as_deref().unwrap_or(".")); Path::new(app_config.output_directory.as_deref().unwrap_or("."));
@ -270,24 +259,26 @@ fn create_qualname(name: &str) -> QualName {
} }
/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data /// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
fn update_imgs_base64( fn update_imgs_base64(article: &Article) -> Result<(), std::io::Error> {
article: &Article,
img_url: &str,
mime_type: &str,
) -> Result<(), std::io::Error> {
let temp_dir = std::env::temp_dir(); let temp_dir = std::env::temp_dir();
let img_path = temp_dir.join(img_url); for (img_url, mime_type) in &article.img_urls {
let img_bytes = std::fs::read(img_path)?; let img_path = temp_dir.join(img_url);
let img_base64_str = format!("data:image:{};base64,{}", mime_type, encode(img_bytes)); let img_bytes = std::fs::read(img_path)?;
let img_base64_str = format!(
"data:image:{};base64,{}",
mime_type.as_deref().unwrap_or("image/*"),
encode(img_bytes)
);
let img_elems = article let img_elems = article
.node_ref() .node_ref()
.select(&format!("img[src=\"{}\"]", img_url)) .select(&format!("img[src=\"{}\"]", img_url))
.unwrap(); .unwrap();
for img_elem in img_elems { for img_elem in img_elems {
let mut img_attr = img_elem.attributes.borrow_mut(); let mut img_attr = img_elem.attributes.borrow_mut();
if let Some(src_attr) = img_attr.get_mut("src") { if let Some(src_attr) = img_attr.get_mut("src") {
*src_attr = img_base64_str.clone(); *src_attr = img_base64_str.clone();
}
} }
} }
Ok(()) Ok(())

View file

@ -150,6 +150,15 @@ async fn process_img_response<'a>(
let img_mime = img_response let img_mime = img_response
.content_type() .content_type()
.map(|mime| mime.essence().to_string()); .map(|mime| mime.essence().to_string());
if let Some(mime_str) = &img_mime {
if !mime_str.starts_with("image/") {
return Err(ErrorKind::HTTPError(format!(
"Invalid image MIME type: {} for {}",
mime_str, url
))
.into());
}
}
let img_ext = match img_response let img_ext = match img_response
.content_type() .content_type()
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string()) .map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())