http.rs: extract process_img_response function

This commit is contained in:
KOVACS Tamas 2021-05-08 21:30:00 +02:00
parent 474d97c6bd
commit 4581f07330

View file

@ -72,6 +72,53 @@ pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
}) })
} }
type ImgItem<'a> = (&'a str, String, Option<String>);
async fn process_img_response<'a>(
img_response: &mut surf::Response,
url: &'a str,
) -> Result<ImgItem<'a>, ImgError> {
let img_content: Vec<u8> = match img_response.body_bytes().await {
Ok(bytes) => bytes,
Err(e) => return Err(e.into()),
};
let img_mime = img_response
.content_type()
.map(|mime| mime.essence().to_string());
let img_ext = match img_response
.content_type()
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
{
Some(mime_str) => mime_str,
None => return Err(ErrorKind::HTTPError("Image has no Content-Type".to_owned()).into()),
};
let mut img_path = std::env::temp_dir();
img_path.push(format!("{}.{}", hash_url(url), &img_ext));
let mut img_file = match File::create(&img_path).await {
Ok(file) => file,
Err(e) => return Err(e.into()),
};
match img_file.write_all(&img_content).await {
Ok(_) => (),
Err(e) => return Err(e.into()),
}
Ok((
url,
img_path
.file_name()
.map(|os_str_name| {
os_str_name
.to_str()
.expect("Unable to get image file name")
.to_string()
})
.unwrap(),
img_mime,
))
}
pub async fn download_images( pub async fn download_images(
extractor: &mut Extractor, extractor: &mut Extractor,
article_origin: &Url, article_origin: &Url,
@ -102,53 +149,9 @@ pub async fn download_images(
bar.set_message(format!("Downloading images [{}/{}]", img_idx + 1, img_count).as_str()); bar.set_message(format!("Downloading images [{}/{}]", img_idx + 1, img_count).as_str());
match req.await { match req.await {
Ok(mut img_response) => { Ok(mut img_response) => {
let process_response = async { let process_response =
let img_content: Vec<u8> = match img_response.body_bytes().await { process_img_response(&mut img_response, url.as_ref()).await;
Ok(bytes) => bytes, process_response.map_err(|mut e: ImgError| {
Err(e) => return Err(e.into()),
};
let img_mime = img_response
.content_type()
.map(|mime| mime.essence().to_string());
let img_ext = match img_response
.content_type()
.map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string())
{
Some(mime_str) => mime_str,
None => {
return Err(ErrorKind::HTTPError(
"Image has no Content-Type".to_owned(),
)
.into())
}
};
let mut img_path = std::env::temp_dir();
img_path.push(format!("{}.{}", hash_url(&url), &img_ext));
let mut img_file = match File::create(&img_path).await {
Ok(file) => file,
Err(e) => return Err(e.into()),
};
match img_file.write_all(&img_content).await {
Ok(_) => (),
Err(e) => return Err(e.into()),
}
Ok((
url,
img_path
.file_name()
.map(|os_str_name| {
os_str_name
.to_str()
.expect("Unable to get image file name")
.to_string()
})
.unwrap(),
img_mime,
))
};
process_response.await.map_err(|mut e: ImgError| {
e.set_url(url); e.set_url(url);
e e
}) })
@ -162,20 +165,19 @@ pub async fn download_images(
}); });
// A utility closure used when update the value of an image source after downloading is successful // A utility closure used when update the value of an image source after downloading is successful
let replace_existing_img_src = let replace_existing_img_src = |img_item: ImgItem| -> (String, Option<String>) {
|img_item: (&String, String, Option<String>)| -> (String, Option<String>) { let (img_url, img_path, img_mime) = img_item;
let (img_url, img_path, img_mime) = img_item; let img_ref = extractor
let img_ref = extractor .article()
.article() .select_first(&format!("img[src='{}']", img_url))
.select_first(&format!("img[src='{}']", img_url)) .expect("Image node does not exist");
.expect("Image node does not exist"); let mut img_node = img_ref.attributes.borrow_mut();
let mut img_node = img_ref.attributes.borrow_mut(); *img_node.get_mut("src").unwrap() = img_path.clone();
*img_node.get_mut("src").unwrap() = img_path.clone(); // srcset is removed because readers such as Foliate then fail to display
// srcset is removed because readers such as Foliate then fail to display // the image already downloaded and stored in src
// the image already downloaded and stored in src img_node.remove("srcset");
img_node.remove("srcset"); (img_path, img_mime)
(img_path, img_mime) };
};
let imgs_req_iter = stream::from_iter(imgs_req_iter) let imgs_req_iter = stream::from_iter(imgs_req_iter)
.buffered(10) .buffered(10)