refactor: refactor update_imgs_base64
chore: add doc comment on ResourceType alias fix: add error when image MIME type is invalid on an image
This commit is contained in:
parent
0b19376f59
commit
07479afeac
3 changed files with 51 additions and 50 deletions
|
@ -4,6 +4,7 @@ use kuchiki::{traits::*, NodeRef};
|
||||||
use crate::errors::PaperoniError;
|
use crate::errors::PaperoniError;
|
||||||
use crate::moz_readability::{MetaData, Readability};
|
use crate::moz_readability::{MetaData, Readability};
|
||||||
|
|
||||||
|
/// A tuple of the url and an Option of the resource's MIME type
|
||||||
pub type ResourceInfo = (String, Option<String>);
|
pub type ResourceInfo = (String, Option<String>);
|
||||||
|
|
||||||
pub struct Article {
|
pub struct Article {
|
||||||
|
|
91
src/html.rs
91
src/html.rs
|
@ -91,38 +91,33 @@ pub fn generate_html_exports(
|
||||||
*id_attr = format!("readability-page-{}", idx);
|
*id_attr = format!("readability-page-{}", idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (img_url, mime_type_opt) in &article.img_urls {
|
if app_config.is_inlining_images {
|
||||||
if app_config.is_inlining_images {
|
info!("Inlining images for {}", title);
|
||||||
info!("Inlining images for {}", title);
|
let result = update_imgs_base64(article);
|
||||||
let result = update_imgs_base64(
|
|
||||||
article,
|
|
||||||
img_url,
|
|
||||||
mime_type_opt.as_deref().unwrap_or("image/*"),
|
|
||||||
);
|
|
||||||
|
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
let mut err: PaperoniError = e.into();
|
let mut err: PaperoniError = e.into();
|
||||||
err.set_article_source(title);
|
err.set_article_source(title);
|
||||||
error!("Unable to copy images to imgs dir for {}", title);
|
error!("Unable to copy images to imgs dir for {}", title);
|
||||||
errors.push(err);
|
errors.push(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Completed inlining images for {}", title);
|
info!("Completed inlining images for {}", title);
|
||||||
|
} else {
|
||||||
|
info!("Copying images to imgs dir for {}", title);
|
||||||
|
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
|
||||||
|
let mut err: PaperoniError = e.into();
|
||||||
|
err.set_article_source(title);
|
||||||
|
err
|
||||||
|
});
|
||||||
|
if let Err(e) = result {
|
||||||
|
error!("Unable to copy images to imgs dir for {}", title);
|
||||||
|
errors.push(e);
|
||||||
} else {
|
} else {
|
||||||
info!("Copying images to imgs dir for {}", title);
|
info!("Successfully copied images to imgs dir for {}", title);
|
||||||
let result = update_img_urls(article, &imgs_dir_path).map_err(|e| {
|
|
||||||
let mut err: PaperoniError = e.into();
|
|
||||||
err.set_article_source(title);
|
|
||||||
err
|
|
||||||
});
|
|
||||||
if let Err(e) = result {
|
|
||||||
error!("Unable to copy images to imgs dir for {}", title);
|
|
||||||
errors.push(e);
|
|
||||||
} else {
|
|
||||||
info!("Successfully copied images to imgs dir for {}", title);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bar.inc(1);
|
bar.inc(1);
|
||||||
successful_articles_table.add_row(vec![title]);
|
successful_articles_table.add_row(vec![title]);
|
||||||
body_elem.as_node().append(article_elem.as_node().clone());
|
body_elem.as_node().append(article_elem.as_node().clone());
|
||||||
|
@ -200,13 +195,7 @@ pub fn generate_html_exports(
|
||||||
let mut out_file = File::create(&file_name)?;
|
let mut out_file = File::create(&file_name)?;
|
||||||
|
|
||||||
if app_config.is_inlining_images {
|
if app_config.is_inlining_images {
|
||||||
for (img_url, mime_type_opt) in &article.img_urls {
|
update_imgs_base64(article)?;
|
||||||
update_imgs_base64(
|
|
||||||
article,
|
|
||||||
img_url,
|
|
||||||
mime_type_opt.as_deref().unwrap_or("image/*"),
|
|
||||||
)?
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
let base_path =
|
let base_path =
|
||||||
Path::new(app_config.output_directory.as_deref().unwrap_or("."));
|
Path::new(app_config.output_directory.as_deref().unwrap_or("."));
|
||||||
|
@ -270,24 +259,26 @@ fn create_qualname(name: &str) -> QualName {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
|
/// Updates the src attribute of `<img>` elements with a base64 encoded string of the image data
|
||||||
fn update_imgs_base64(
|
fn update_imgs_base64(article: &Article) -> Result<(), std::io::Error> {
|
||||||
article: &Article,
|
|
||||||
img_url: &str,
|
|
||||||
mime_type: &str,
|
|
||||||
) -> Result<(), std::io::Error> {
|
|
||||||
let temp_dir = std::env::temp_dir();
|
let temp_dir = std::env::temp_dir();
|
||||||
let img_path = temp_dir.join(img_url);
|
for (img_url, mime_type) in &article.img_urls {
|
||||||
let img_bytes = std::fs::read(img_path)?;
|
let img_path = temp_dir.join(img_url);
|
||||||
let img_base64_str = format!("data:image:{};base64,{}", mime_type, encode(img_bytes));
|
let img_bytes = std::fs::read(img_path)?;
|
||||||
|
let img_base64_str = format!(
|
||||||
|
"data:image:{};base64,{}",
|
||||||
|
mime_type.as_deref().unwrap_or("image/*"),
|
||||||
|
encode(img_bytes)
|
||||||
|
);
|
||||||
|
|
||||||
let img_elems = article
|
let img_elems = article
|
||||||
.node_ref()
|
.node_ref()
|
||||||
.select(&format!("img[src=\"{}\"]", img_url))
|
.select(&format!("img[src=\"{}\"]", img_url))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
for img_elem in img_elems {
|
for img_elem in img_elems {
|
||||||
let mut img_attr = img_elem.attributes.borrow_mut();
|
let mut img_attr = img_elem.attributes.borrow_mut();
|
||||||
if let Some(src_attr) = img_attr.get_mut("src") {
|
if let Some(src_attr) = img_attr.get_mut("src") {
|
||||||
*src_attr = img_base64_str.clone();
|
*src_attr = img_base64_str.clone();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -150,6 +150,15 @@ async fn process_img_response<'a>(
|
||||||
let img_mime = img_response
|
let img_mime = img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| mime.essence().to_string());
|
.map(|mime| mime.essence().to_string());
|
||||||
|
if let Some(mime_str) = &img_mime {
|
||||||
|
if !mime_str.starts_with("image/") {
|
||||||
|
return Err(ErrorKind::HTTPError(format!(
|
||||||
|
"Invalid image MIME type: {} for {}",
|
||||||
|
mime_str, url
|
||||||
|
))
|
||||||
|
.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
let img_ext = match img_response
|
let img_ext = match img_response
|
||||||
.content_type()
|
.content_type()
|
||||||
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
|
||||||
|
|
Loading…
Reference in a new issue