2021-02-06 09:59:03 +00:00
|
|
|
use std::fs::File;
|
|
|
|
|
2021-04-20 12:02:56 +01:00
|
|
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
2021-02-06 09:59:03 +00:00
|
|
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
2021-04-17 15:27:38 +01:00
|
|
|
use indicatif::{ProgressBar, ProgressStyle};
|
2021-04-24 11:54:47 +01:00
|
|
|
use log::{debug, info};
|
2021-02-06 09:59:03 +00:00
|
|
|
|
2021-04-17 10:04:06 +01:00
|
|
|
use crate::{
|
2021-04-29 17:58:37 +01:00
|
|
|
cli::AppConfig,
|
2021-04-17 10:04:06 +01:00
|
|
|
errors::PaperoniError,
|
|
|
|
extractor::{self, Extractor},
|
|
|
|
};
|
2021-02-06 09:59:03 +00:00
|
|
|
|
2021-04-17 10:04:06 +01:00
|
|
|
pub fn generate_epubs(
|
|
|
|
articles: Vec<Extractor>,
|
2021-04-29 17:58:37 +01:00
|
|
|
app_config: &AppConfig,
|
2021-04-24 07:00:18 +01:00
|
|
|
successful_articles_table: &mut Table,
|
2021-04-20 19:09:38 +01:00
|
|
|
) -> Result<(), Vec<PaperoniError>> {
|
2021-04-29 17:58:37 +01:00
|
|
|
let bar = if app_config.can_disable_progress_bar() {
|
|
|
|
ProgressBar::hidden()
|
|
|
|
} else {
|
|
|
|
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
|
|
|
let style = ProgressStyle::default_bar().template(
|
2021-04-17 15:27:38 +01:00
|
|
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
|
|
|
);
|
2021-04-29 17:58:37 +01:00
|
|
|
enabled_bar.set_style(style);
|
|
|
|
if !articles.is_empty() {
|
|
|
|
enabled_bar.set_message("Generating epubs");
|
|
|
|
}
|
|
|
|
enabled_bar
|
|
|
|
};
|
2021-04-20 19:09:38 +01:00
|
|
|
|
|
|
|
let mut errors: Vec<PaperoniError> = Vec::new();
|
|
|
|
|
2021-04-29 17:58:37 +01:00
|
|
|
match app_config.merged() {
|
2021-02-11 10:51:21 +00:00
|
|
|
Some(name) => {
|
2021-04-24 07:00:18 +01:00
|
|
|
successful_articles_table.set_header(vec![Cell::new("Table of Contents")
|
2021-04-20 12:02:56 +01:00
|
|
|
.add_attribute(Attribute::Bold)
|
|
|
|
.set_alignment(CellAlignment::Center)
|
|
|
|
.fg(Color::Green)]);
|
2021-04-20 19:09:38 +01:00
|
|
|
|
|
|
|
let mut epub = match EpubBuilder::new(match ZipLibrary::new() {
|
|
|
|
Ok(zip_library) => zip_library,
|
|
|
|
Err(err) => {
|
|
|
|
let mut paperoni_err: PaperoniError = err.into();
|
|
|
|
paperoni_err.set_article_source(name);
|
|
|
|
errors.push(paperoni_err);
|
|
|
|
return Err(errors);
|
|
|
|
}
|
|
|
|
}) {
|
|
|
|
Ok(epub) => epub,
|
|
|
|
Err(err) => {
|
|
|
|
let mut paperoni_err: PaperoniError = err.into();
|
|
|
|
paperoni_err.set_article_source(name);
|
|
|
|
errors.push(paperoni_err);
|
|
|
|
return Err(errors);
|
|
|
|
}
|
|
|
|
};
|
2021-04-24 11:54:47 +01:00
|
|
|
debug!("Creating {:?}", name);
|
2021-02-11 10:51:21 +00:00
|
|
|
epub.inline_toc();
|
2021-04-20 19:09:38 +01:00
|
|
|
articles
|
2021-02-11 10:51:21 +00:00
|
|
|
.iter()
|
|
|
|
.enumerate()
|
2021-04-20 19:09:38 +01:00
|
|
|
.fold(&mut epub, |epub, (idx, article)| {
|
|
|
|
let mut article_result = || -> Result<(), PaperoniError> {
|
|
|
|
let mut html_buf = Vec::new();
|
2021-04-21 17:07:08 +01:00
|
|
|
extractor::serialize_to_xhtml(article.article(), &mut html_buf)?;
|
2021-04-20 19:09:38 +01:00
|
|
|
let html_str = std::str::from_utf8(&html_buf)?;
|
|
|
|
epub.metadata("title", replace_metadata_value(name))?;
|
|
|
|
let section_name = article.metadata().title();
|
|
|
|
epub.add_content(
|
|
|
|
EpubContent::new(format!("article_{}.xhtml", idx), html_str.as_bytes())
|
|
|
|
.title(replace_metadata_value(section_name)),
|
|
|
|
)?;
|
2021-04-24 11:54:47 +01:00
|
|
|
info!("Adding images for {:?}", name);
|
2021-04-20 19:09:38 +01:00
|
|
|
article.img_urls.iter().for_each(|img| {
|
|
|
|
// TODO: Add error handling and return errors as a vec
|
|
|
|
let mut file_path = std::env::temp_dir();
|
|
|
|
file_path.push(&img.0);
|
|
|
|
|
|
|
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
|
|
|
epub.add_resource(
|
|
|
|
file_path.file_name().unwrap(),
|
|
|
|
img_buf,
|
|
|
|
img.1.as_ref().unwrap(),
|
|
|
|
)
|
|
|
|
.unwrap();
|
|
|
|
});
|
2021-04-24 11:54:47 +01:00
|
|
|
info!("Added images for {:?}", name);
|
2021-04-20 19:09:38 +01:00
|
|
|
Ok(())
|
|
|
|
};
|
|
|
|
if let Err(mut error) = article_result() {
|
|
|
|
error.set_article_source(&article.url);
|
|
|
|
errors.push(error);
|
|
|
|
}
|
2021-04-20 12:02:56 +01:00
|
|
|
bar.inc(1);
|
2021-04-24 07:00:18 +01:00
|
|
|
successful_articles_table.add_row(vec![article.metadata().title()]);
|
2021-02-11 10:51:21 +00:00
|
|
|
epub
|
|
|
|
});
|
2021-04-27 18:34:26 +01:00
|
|
|
let appendix = generate_appendix(articles.iter().collect());
|
|
|
|
if let Err(err) = epub.add_content(
|
|
|
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
|
|
|
.title(replace_metadata_value("Article Sources")),
|
|
|
|
) {
|
|
|
|
let mut paperoni_err: PaperoniError = err.into();
|
|
|
|
paperoni_err.set_article_source(name);
|
|
|
|
errors.push(paperoni_err);
|
|
|
|
return Err(errors);
|
|
|
|
}
|
|
|
|
|
2021-02-11 10:51:21 +00:00
|
|
|
let mut out_file = File::create(&name).unwrap();
|
2021-04-20 19:09:38 +01:00
|
|
|
match epub.generate(&mut out_file) {
|
|
|
|
Ok(_) => (),
|
|
|
|
Err(err) => {
|
|
|
|
let mut paperoni_err: PaperoniError = err.into();
|
|
|
|
paperoni_err.set_article_source(name);
|
|
|
|
errors.push(paperoni_err);
|
|
|
|
return Err(errors);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-17 15:27:38 +01:00
|
|
|
bar.finish_with_message("Generated epub\n");
|
2021-04-24 11:54:47 +01:00
|
|
|
debug!("Created {:?}", name);
|
2021-02-11 10:51:21 +00:00
|
|
|
println!("Created {:?}", name);
|
|
|
|
}
|
|
|
|
None => {
|
2021-04-24 07:00:18 +01:00
|
|
|
successful_articles_table
|
2021-04-20 12:02:56 +01:00
|
|
|
.set_header(vec![Cell::new("Downloaded articles")
|
|
|
|
.add_attribute(Attribute::Bold)
|
|
|
|
.set_alignment(CellAlignment::Center)
|
|
|
|
.fg(Color::Green)])
|
|
|
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
|
|
|
|
2021-04-24 07:00:18 +01:00
|
|
|
for article in &articles {
|
2021-04-20 19:09:38 +01:00
|
|
|
let mut result = || -> Result<(), PaperoniError> {
|
|
|
|
let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;
|
|
|
|
let file_name = format!(
|
|
|
|
"{}.epub",
|
|
|
|
article
|
|
|
|
.metadata()
|
|
|
|
.title()
|
|
|
|
.replace("/", " ")
|
|
|
|
.replace("\\", " ")
|
|
|
|
);
|
2021-04-24 11:54:47 +01:00
|
|
|
debug!("Creating {:?}", file_name);
|
2021-04-20 19:09:38 +01:00
|
|
|
let mut out_file = File::create(&file_name).unwrap();
|
|
|
|
let mut html_buf = Vec::new();
|
2021-04-21 17:07:08 +01:00
|
|
|
extractor::serialize_to_xhtml(article.article(), &mut html_buf)
|
2021-04-20 19:09:38 +01:00
|
|
|
.expect("Unable to serialize to xhtml");
|
|
|
|
let html_str = std::str::from_utf8(&html_buf).unwrap();
|
|
|
|
if let Some(author) = article.metadata().byline() {
|
|
|
|
epub.metadata("author", replace_metadata_value(author))?;
|
|
|
|
}
|
|
|
|
epub.metadata("title", replace_metadata_value(article.metadata().title()))?;
|
|
|
|
epub.add_content(EpubContent::new("index.xhtml", html_str.as_bytes()))?;
|
|
|
|
for img in &article.img_urls {
|
|
|
|
let mut file_path = std::env::temp_dir();
|
|
|
|
file_path.push(&img.0);
|
|
|
|
|
|
|
|
let img_buf = File::open(&file_path).expect("Can't read file");
|
|
|
|
epub.add_resource(
|
|
|
|
file_path.file_name().unwrap(),
|
|
|
|
img_buf,
|
|
|
|
img.1.as_ref().unwrap(),
|
|
|
|
)?;
|
|
|
|
}
|
2021-04-27 18:34:26 +01:00
|
|
|
let appendix = generate_appendix(vec![&article]);
|
|
|
|
epub.add_content(
|
|
|
|
EpubContent::new("appendix.xhtml", appendix.as_bytes())
|
|
|
|
.title(replace_metadata_value("Article Source")),
|
|
|
|
)?;
|
2021-04-20 19:09:38 +01:00
|
|
|
epub.generate(&mut out_file)?;
|
|
|
|
bar.inc(1);
|
2021-04-20 12:02:56 +01:00
|
|
|
|
2021-04-24 07:00:18 +01:00
|
|
|
successful_articles_table.add_row(vec![article.metadata().title()]);
|
2021-04-20 12:02:56 +01:00
|
|
|
|
2021-04-24 11:54:47 +01:00
|
|
|
debug!("Created {:?}", file_name);
|
2021-04-20 19:09:38 +01:00
|
|
|
Ok(())
|
|
|
|
};
|
|
|
|
if let Err(mut error) = result() {
|
|
|
|
error.set_article_source(&article.url);
|
|
|
|
errors.push(error);
|
|
|
|
}
|
2021-02-11 10:51:21 +00:00
|
|
|
}
|
2021-04-17 15:27:38 +01:00
|
|
|
bar.finish_with_message("Generated epubs\n");
|
2021-02-11 10:51:21 +00:00
|
|
|
}
|
2021-02-06 09:59:03 +00:00
|
|
|
}
|
2021-04-24 07:00:18 +01:00
|
|
|
|
2021-04-20 19:09:38 +01:00
|
|
|
if errors.is_empty() {
|
|
|
|
Ok(())
|
|
|
|
} else {
|
|
|
|
Err(errors)
|
|
|
|
}
|
2021-02-06 09:59:03 +00:00
|
|
|
}
|
2021-02-06 10:53:04 +00:00
|
|
|
|
|
|
|
/// Replaces characters that have to be escaped before adding to the epub's metadata
|
|
|
|
fn replace_metadata_value(value: &str) -> String {
|
|
|
|
value
|
|
|
|
.replace("&", "&")
|
|
|
|
.replace("<", "<")
|
|
|
|
.replace(">", ">")
|
|
|
|
}
|
|
|
|
|
2021-04-27 18:34:26 +01:00
|
|
|
//TODO: The type signature of the argument should change as it requires that merged articles create an entirely new Vec of references
|
|
|
|
fn generate_appendix(articles: Vec<&Extractor>) -> String {
|
|
|
|
let link_tags: String = articles
|
|
|
|
.iter()
|
|
|
|
.map(|article| {
|
|
|
|
let article_name = if !article.metadata().title().is_empty() {
|
|
|
|
article.metadata().title()
|
|
|
|
} else {
|
|
|
|
&article.url
|
|
|
|
};
|
|
|
|
format!(
|
|
|
|
"<a href=\"{}\">{}</a><br></br>",
|
|
|
|
replace_metadata_value(&article.url),
|
|
|
|
replace_metadata_value(article_name)
|
|
|
|
)
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
let template = format!(
|
|
|
|
r#"<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
|
|
|
|
<head>
|
|
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<h2>Appendix</h2><h3>Article sources</h3>
|
|
|
|
{}
|
|
|
|
</body>
|
|
|
|
</html>"#,
|
|
|
|
link_tags
|
|
|
|
);
|
|
|
|
template
|
|
|
|
}
|
|
|
|
|
2021-02-06 10:53:04 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::replace_metadata_value;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_replace_metadata_value() {
|
|
|
|
let mut value = "Lorem ipsum";
|
|
|
|
assert_eq!(replace_metadata_value(value), "Lorem ipsum");
|
|
|
|
value = "Memory safe > memory unsafe";
|
|
|
|
assert_eq!(
|
|
|
|
replace_metadata_value(value),
|
|
|
|
"Memory safe > memory unsafe"
|
|
|
|
);
|
|
|
|
value = "Author Name <author@mail.example>";
|
|
|
|
assert_eq!(
|
|
|
|
replace_metadata_value(value),
|
|
|
|
"Author Name <author@mail.example>"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|