2021-02-06 09:59:03 +00:00
|
|
|
use std::fs::File;
|
|
|
|
|
|
|
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
|
|
|
|
|
|
|
use crate::extractor::{self, Extractor};
|
|
|
|
|
2021-02-11 10:51:21 +00:00
|
|
|
pub fn generate_epubs(articles: Vec<Extractor>, merged: Option<&String>) {
|
|
|
|
match merged {
|
|
|
|
Some(name) => {
|
|
|
|
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
|
|
|
epub.inline_toc();
|
|
|
|
epub = articles
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.fold(epub, |mut epub, (idx, article)| {
|
|
|
|
let mut html_buf = Vec::new();
|
|
|
|
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
|
|
|
.expect("Unable to serialize to xhtml");
|
|
|
|
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
|
|
|
epub.metadata("title", replace_metadata_value(name))
|
|
|
|
.unwrap();
|
|
|
|
let section_name = article.metadata().title();
|
|
|
|
epub.add_content(
|
|
|
|
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
|
|
|
.title(replace_metadata_value(section_name)),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
article.img_urls.iter().for_each(|img| {
|
|
|
|
let mut file_path = std::env::temp_dir();
|
|
|
|
file_path.push(&img.0);
|
|
|
|
|
|
|
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
|
|
|
epub.add_resource(
|
|
|
|
file_path.file_name().unwrap(),
|
|
|
|
img_buf,
|
|
|
|
img.1.as_ref().unwrap(),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
});
|
|
|
|
epub
|
|
|
|
});
|
|
|
|
let mut out_file = File::create(&name).unwrap();
|
|
|
|
epub.generate(&mut out_file).unwrap();
|
|
|
|
println!("Created {:?}", name);
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
for article in articles {
|
|
|
|
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
|
|
|
let file_name = format!(
|
|
|
|
"{}.epub",
|
|
|
|
article
|
|
|
|
.metadata()
|
|
|
|
.title()
|
|
|
|
.replace("/", " ")
|
|
|
|
.replace("\\", " ")
|
|
|
|
);
|
|
|
|
let mut out_file = File::create(&file_name).unwrap();
|
|
|
|
let mut html_buf = Vec::new();
|
|
|
|
extractor::serialize_to_xhtml(article.article().unwrap(), &mut html_buf)
|
|
|
|
.expect("Unable to serialize to xhtml");
|
|
|
|
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
|
|
|
if let Some(author) = article.metadata().byline() {
|
|
|
|
epub.metadata("author", replace_metadata_value(author))
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
epub.metadata("title", replace_metadata_value(article.metadata().title()))
|
|
|
|
.unwrap();
|
|
|
|
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))
|
|
|
|
.unwrap();
|
|
|
|
for img in article.img_urls {
|
|
|
|
let mut file_path = std::env::temp_dir();
|
|
|
|
file_path.push(&img.0);
|
2021-02-06 09:59:03 +00:00
|
|
|
|
2021-02-11 10:51:21 +00:00
|
|
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
|
|
|
epub.add_resource(file_path.file_name().unwrap(), img_buf, img.1.unwrap())
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
epub.generate(&mut out_file).unwrap();
|
|
|
|
println!("Created {:?}", file_name);
|
|
|
|
}
|
|
|
|
}
|
2021-02-06 09:59:03 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-06 10:53:04 +00:00
|
|
|
|
|
|
|
/// Replaces characters that have to be escaped before adding to the epub's metadata
|
|
|
|
fn replace_metadata_value(value: &str) -> String {
|
|
|
|
value
|
|
|
|
.replace("&", "&")
|
|
|
|
.replace("<", "<")
|
|
|
|
.replace(">", ">")
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::replace_metadata_value;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_metadata_value() {
|
|
|
|
let mut value = "Lorem ipsum";
|
|
|
|
assert_eq!(replace_metadata_value(value), "Lorem ipsum");
|
|
|
|
value = "Memory safe > memory unsafe";
|
|
|
|
assert_eq!(
|
|
|
|
replace_metadata_value(value),
|
|
|
|
"Memory safe > memory unsafe"
|
|
|
|
);
|
|
|
|
value = "Author Name <author@mail.example>";
|
|
|
|
assert_eq!(
|
|
|
|
replace_metadata_value(value),
|
|
|
|
"Author Name <author@mail.example>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|