diff --git a/Cargo.lock b/Cargo.lock index 01c1426..e1d51de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1242,7 +1242,7 @@ dependencies = [ [[package]] name = "paperoni" -version = "0.2.0-alpha1" +version = "0.2.1-alpha1" dependencies = [ "async-std", "clap", diff --git a/Cargo.toml b/Cargo.toml index 7559efa..c4c64ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ description = "A web article downloader" homepage = "https://github.com/hipstermojo/paperoni" repository = "https://github.com/hipstermojo/paperoni" name = "paperoni" -version = "0.2.0-alpha1" +version = "0.2.1-alpha1" authors = ["Kenneth Gitere "] edition = "2018" license = "MIT" diff --git a/src/cli.rs b/src/cli.rs index f62b0c5..92b56f4 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -6,7 +6,7 @@ pub fn cli_init() -> App<'static, 'static> { AppSettings::ArgRequiredElseHelp, AppSettings::UnifiedHelpMessage, ]) - .version("0.1.0-alpha1") + .version("0.2.1-alpha1") .about( " Paperoni is an article downloader. diff --git a/src/extractor.rs b/src/extractor.rs index ea7066f..9294ae6 100644 --- a/src/extractor.rs +++ b/src/extractor.rs @@ -11,7 +11,7 @@ use crate::moz_readability::{MetaData, Readability}; pub type ResourceInfo = (String, Option); lazy_static! { - static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r"(&|<|>)").unwrap(); + static ref ESC_SEQ_REGEX: regex::Regex = regex::Regex::new(r#"(&|<|>|'|")"#).unwrap(); } pub struct Extractor { @@ -56,7 +56,7 @@ impl Extractor { for img_ref in content_ref.select("img").unwrap() { img_ref.as_node().as_element().map(|img_elem| { img_elem.attributes.borrow().get("src").map(|img_url| { - if !img_url.is_empty() { + if !(img_url.is_empty() || img_url.starts_with("data:image")) { self.img_urls.push((img_url.to_string(), None)) } }) @@ -75,7 +75,9 @@ impl Extractor { async_download_tasks.push(task::spawn(async move { let mut img_response = surf::Client::new() - .with(surf::middleware::Redirect::default()) + // The middleware has been temporarily commented out because it happens + // to affect downloading images when there is no redirecting + // .with(surf::middleware::Redirect::default()) .get(&abs_url) .await .expect("Unable to retrieve file"); @@ -185,6 +187,8 @@ pub fn serialize_to_xhtml( escape_map.insert("<", "<"); escape_map.insert(">", ">"); escape_map.insert("&", "&"); + escape_map.insert("\"", """); + escape_map.insert("'", "'"); for edge in node_ref.traverse_inclusive() { match edge { kuchiki::iter::NodeEdge::Start(n) => match n.data() { @@ -248,6 +252,7 @@ mod test {

Some Lorem Ipsum text here

Observe this picture

Random image +