diff --git a/Cargo.lock b/Cargo.lock index 8466dbf..2c8d164 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1283,6 +1283,7 @@ dependencies = [ "md5", "regex", "surf", + "thiserror", "url", ] @@ -1960,18 +1961,18 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" [[package]] name = "thiserror" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 05660ed..451bf38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,4 +22,5 @@ lazy_static = "1.4.0" md5 = "0.7.0" regex = "1.4.2" surf = "2.1.0" +thiserror = "1.0.24" url = "2.2.0" diff --git a/src/epub.rs b/src/epub.rs index e6e0376..a7a8cbc 100644 --- a/src/epub.rs +++ b/src/epub.rs @@ -2,12 +2,18 @@ use std::fs::File; use epub_builder::{EpubBuilder, EpubContent, ZipLibrary}; -use crate::extractor::{self, Extractor}; +use crate::{ + errors::PaperoniError, + extractor::{self, Extractor}, +}; -pub fn generate_epubs(articles: Vec, merged: Option<&String>) { +pub fn generate_epubs( + articles: Vec, + merged: Option<&String>, +) -> Result<(), PaperoniError> { match merged { Some(name) => { - let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); + let mut epub = EpubBuilder::new(ZipLibrary::new()?)?; epub.inline_toc(); epub = articles .iter() @@ -41,12 +47,12 @@ pub fn generate_epubs(articles: Vec, merged: Option<&String>) { epub }); let mut out_file = File::create(&name).unwrap(); - epub.generate(&mut out_file).unwrap(); + epub.generate(&mut out_file)?; println!("Created {:?}", name); } None => { for article in articles { - let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(); + let mut epub = EpubBuilder::new(ZipLibrary::new()?)?; let file_name = format!( "{}.epub", article @@ -61,26 +67,23 @@ pub fn generate_epubs(articles: Vec, merged: Option<&String>) { .expect("Unable to serialize to xhtml"); let html_str = std::str::from_utf8(&html_buf).unwrap(); if let Some(author) = article.metadata().byline() { - epub.metadata("author", replace_metadata_value(author)) - .unwrap(); + epub.metadata("author", replace_metadata_value(author))?; } - epub.metadata("title", replace_metadata_value(article.metadata().title())) - .unwrap(); - epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes())) - .unwrap(); + epub.metadata("title", replace_metadata_value(article.metadata().title()))?; + epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?; for img in article.img_urls { let mut file_path = std::env::temp_dir(); file_path.push(&img.0); let img_buf = File::open(&file_path).expect("Can't read file"); - epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap()) - .unwrap(); + epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())?; } - epub.generate(&mut out_file).unwrap(); + epub.generate(&mut out_file)?; println!("Created {:?}", file_name); } } } + Ok(()) } /// Replaces characters that have to be escaped before adding to the epub's metadata diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..f0b3d9c --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,61 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ErrorKind { + #[error("[EpubError]: {0}")] + EpubError(String), + #[error("[HTTPError]: {0}")] + HTTPError(String), + #[error("[IOError]: {0}")] + IOError(String), +} + +#[derive(Error, Debug)] +#[error("{kind}")] +pub struct PaperoniError { + article_link: Option, + kind: ErrorKind, +} + +impl PaperoniError { + pub fn with_kind(kind: ErrorKind) -> Self { + PaperoniError { + article_link: None, + kind, + } + } + + pub fn set_article_link(&mut self, article_link: String) { + self.article_link = Some(article_link); + } +} + +impl From for PaperoniError { + fn from(kind: ErrorKind) -> Self { + PaperoniError::with_kind(kind) + } +} + +impl From for PaperoniError { + fn from(err: epub_builder::Error) -> Self { + PaperoniError::with_kind(ErrorKind::EpubError(err.description().to_owned())) + } +} + +impl From for PaperoniError { + fn from(err: surf::Error) -> Self { + PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string())) + } +} + +impl From for PaperoniError { + fn from(err: url::ParseError) -> Self { + PaperoniError::with_kind(ErrorKind::HTTPError(err.to_string())) + } +} + +impl From for PaperoniError { + fn from(err: std::io::Error) -> Self { + PaperoniError::with_kind(ErrorKind::IOError(err.to_string())) + } +} diff --git a/src/extractor.rs b/src/extractor.rs index 0fcc5e8..64b9a2a 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use kuchiki::{traits::*, NodeRef}; +use crate::errors::PaperoniError; use crate::moz_readability::{MetaData, Readability}; pub type ResourceInfo = (String, Option); @@ -75,7 +76,7 @@ impl Extractor { pub fn serialize_to_xhtml( node_ref: &NodeRef, mut w: &mut W, -) -> Result<(), Box> { +) -> Result<(), PaperoniError> { let mut escape_map = HashMap::new(); escape_map.insert("<", "<"); escape_map.insert(">", ">"); diff --git a/src/http.rs b/src/http.rs index faf9428..9ff7ef8 100644 --- a/src/http.rs +++ b/src/http.rs @@ -3,13 +3,11 @@ use async_std::{fs::File, stream}; use futures::StreamExt; use url::Url; -use crate::extractor::Extractor; +use crate::{errors::ErrorKind, errors::PaperoniError, extractor::Extractor}; type HTMLResource = (String, String); -pub async fn fetch_url( - url: &str, -) -> Result> { +pub async fn fetch_url(url: &str) -> Result { let client = surf::Client::new(); println!("Fetching..."); @@ -37,26 +35,28 @@ pub async fn fetch_url( if mime.essence() == "text/html" { return Ok((url.to_string(), res.body_string().await?)); } else { - return Err(format!( + let msg = format!( "Invalid HTTP response. Received {} instead of text/html", mime.essence() - ) - .into()); + ); + + return Err(ErrorKind::HTTPError(msg).into()); } } else { - return Err("Unknown HTTP response".into()); + return Err(ErrorKind::HTTPError("Unknown HTTP response".to_owned()).into()); } } else { - return Err(format!("Request failed: HTTP {}", res.status()).into()); + let msg = format!("Request failed: HTTP {}", res.status()); + return Err(ErrorKind::HTTPError(msg).into()); } } - Err("Unable to fetch HTML".into()) + Err(ErrorKind::HTTPError("Unable to fetch HTML".to_owned()).into()) } pub async fn download_images( extractor: &mut Extractor, article_origin: &Url, -) -> async_std::io::Result<()> { +) -> Result<(), Vec> { if extractor.img_urls.len() > 0 { println!("Downloading images..."); } @@ -71,39 +71,56 @@ pub async fn download_images( ) }) .map(|(url, req)| async move { - let mut img_response = req.await.expect("Unable to retrieve image"); - let img_content: Vec = img_response.body_bytes().await.unwrap(); - let img_mime = img_response - .content_type() - .map(|mime| mime.essence().to_string()); - let img_ext = img_response - .content_type() - .map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string()) - .unwrap(); + match req.await { + Ok(mut img_response) => { + // let mut img_response = req.await.expect("Unable to retrieve image"); + let img_content: Vec = match img_response.body_bytes().await { + Ok(bytes) => bytes, + Err(e) => return Err(e.into()), + }; + let img_mime = img_response + .content_type() + .map(|mime| mime.essence().to_string()); + let img_ext = match img_response + .content_type() + .map(|mime| map_mime_subtype_to_ext(mime.subtype()).to_string()) + { + Some(mime_str) => mime_str, + None => { + return Err(ErrorKind::HTTPError( + "Image has no Content-Type".to_owned(), + ) + .into()) + } + }; - let mut img_path = std::env::temp_dir(); - img_path.push(format!("{}.{}", hash_url(&url), &img_ext)); - let mut img_file = File::create(&img_path) - .await - .expect("Unable to create file"); - img_file - .write_all(&img_content) - .await - .expect("Unable to save to file"); + let mut img_path = std::env::temp_dir(); + img_path.push(format!("{}.{}", hash_url(&url), &img_ext)); + let mut img_file = match File::create(&img_path).await { + Ok(file) => file, + Err(e) => return Err(e.into()), + }; + match img_file.write_all(&img_content).await { + Ok(_) => (), + Err(e) => return Err(e.into()), + } - ( - url, - img_path - .file_name() - .map(|os_str_name| { - os_str_name - .to_str() - .expect("Unable to get image file name") - .to_string() - }) - .unwrap(), - img_mime, - ) + Ok(( + url, + img_path + .file_name() + .map(|os_str_name| { + os_str_name + .to_str() + .expect("Unable to get image file name") + .to_string() + }) + .unwrap(), + img_mime, + )) + } + Err(e) => Err(e.into()), + } }); // A utility closure used when update the value of an image source after downloading is successful @@ -124,14 +141,24 @@ pub async fn download_images( (img_path, img_mime) }; - extractor.img_urls = stream::from_iter(imgs_req_iter) + let imgs_req_iter = stream::from_iter(imgs_req_iter) .buffered(10) - .collect::>() - .await - .into_iter() - .map(replace_existing_img_src) - .collect(); - Ok(()) + .collect::>>() + .await; + let mut errors = Vec::new(); + let mut replaced_imgs = Vec::new(); + for img_req_result in imgs_req_iter { + match img_req_result { + Ok(img_req) => replaced_imgs.push(replace_existing_img_src(img_req)), + Err(e) => errors.push(e), + } + } + extractor.img_urls = replaced_imgs; + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } } /// Handles getting the extension from a given MIME subtype. diff --git a/src/main.rs b/src/main.rs index 0467712..7ad2560 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ use url::Url; mod cli; mod epub; +mod errors; mod extractor; /// This module is responsible for async HTTP calls for downloading /// the HTML content and images @@ -41,9 +42,17 @@ fn download(app_config: AppConfig) { if extractor.article().is_some() { extractor.extract_img_urls(); - download_images(&mut extractor, &Url::parse(&url).unwrap()) - .await - .expect("Unable to download images"); + + if let Err(img_errors) = + download_images(&mut extractor, &Url::parse(&url).unwrap()).await + { + eprintln!( + "{} image{} failed to download for {}", + img_errors.len(), + if img_errors.len() > 1 { "s" } else { "" }, + url + ); + } articles.push(extractor); } } @@ -52,5 +61,8 @@ fn download(app_config: AppConfig) { } articles }); - generate_epubs(articles, app_config.merged()); + match generate_epubs(articles, app_config.merged()) { + Ok(_) => (), + Err(e) => eprintln!("{}", e), + }; }