Compare commits
3 commits
Author | SHA1 | Date | |
---|---|---|---|
|
abaa7d37df | ||
|
e777426c1b | ||
|
3bf0719c8e |
13 changed files with 888 additions and 864 deletions
|
@ -1,20 +0,0 @@
|
||||||
steps:
|
|
||||||
build:
|
|
||||||
when:
|
|
||||||
- event: cron
|
|
||||||
- event: push
|
|
||||||
branch: main
|
|
||||||
- event: pull_request
|
|
||||||
image: docker.io/rust:latest
|
|
||||||
commands:
|
|
||||||
- apt-get install libssl-dev
|
|
||||||
- cargo build
|
|
||||||
test:
|
|
||||||
when:
|
|
||||||
- event: cron
|
|
||||||
- event: push
|
|
||||||
branch: main
|
|
||||||
- event: pull_request
|
|
||||||
image: docker.io/rust:latest
|
|
||||||
commands:
|
|
||||||
- cargo test
|
|
1425
Cargo.lock
generated
1425
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
35
Cargo.toml
35
Cargo.toml
|
@ -13,27 +13,26 @@ readme = "README.md"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# atty = "0.2.14"
|
# atty = "0.2.14"
|
||||||
async-std = "1.12.0"
|
async-std = "1.10.0"
|
||||||
base64 = "0.22.0"
|
base64 = "0.13.0"
|
||||||
chrono = "0.4.38"
|
chrono = "0.4.19"
|
||||||
clap = { version = "2.34.0", features = ["yaml"] }
|
clap = { version = "2.34.0", features = ["yaml"] }
|
||||||
colored = "2.1.0"
|
colored = "2.0.0"
|
||||||
comfy-table = "7.1.1"
|
comfy-table = "3.0.0"
|
||||||
derive_builder = "0.20.0"
|
derive_builder = "0.10.2"
|
||||||
directories = "5.0.1"
|
directories = "3.0.2"
|
||||||
epub-builder = "0.7.4"
|
epub-builder = "0.4.10"
|
||||||
eyre = "0.6.12"
|
flexi_logger = "0.22.2"
|
||||||
flexi_logger = "0.29.0"
|
futures = "0.3.19"
|
||||||
futures = "0.3.30"
|
|
||||||
html5ever = "0.25.1"
|
html5ever = "0.25.1"
|
||||||
indicatif = "0.17.8"
|
indicatif = "0.16.2"
|
||||||
itertools = "0.13.0"
|
itertools = "0.10.3"
|
||||||
kuchiki = "0.8.1"
|
kuchiki = "0.8.1"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
log = "0.4.21"
|
log = "0.4.14"
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
openssl-sys = "0.9.102"
|
regex = "1.5.4"
|
||||||
regex = "1.11.0"
|
serde = "1.0.136"
|
||||||
surf = "2.3.2"
|
surf = "2.3.2"
|
||||||
thiserror = "1.0.59"
|
thiserror = "1.0.30"
|
||||||
url = "2.5.0"
|
url = "2.2.2"
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
{
|
|
||||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
|
||||||
"extends": ["config:recommended"],
|
|
||||||
"packageRules": [
|
|
||||||
{
|
|
||||||
"matchManagers": ["cargo"],
|
|
||||||
"rangeStrategy": "replace"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
1
rust-toolchain
Normal file
1
rust-toolchain
Normal file
|
@ -0,0 +1 @@
|
||||||
|
1.57.0
|
11
src/epub.rs
11
src/epub.rs
|
@ -30,11 +30,7 @@ pub fn generate_epubs(
|
||||||
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
||||||
let style = ProgressStyle::default_bar().template(
|
let style = ProgressStyle::default_bar().template(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} epub {pos}/{len:7} {msg:.green}",
|
||||||
).map_err(|e| {
|
);
|
||||||
let mut paperoni_err: PaperoniError = e.into();
|
|
||||||
paperoni_err.set_article_source("progress bar");
|
|
||||||
vec![paperoni_err]
|
|
||||||
})?;
|
|
||||||
enabled_bar.set_style(style);
|
enabled_bar.set_style(style);
|
||||||
if !articles.is_empty() {
|
if !articles.is_empty() {
|
||||||
enabled_bar.set_message("Generating epubs");
|
enabled_bar.set_message("Generating epubs");
|
||||||
|
@ -253,7 +249,10 @@ fn replace_escaped_characters(value: &str) -> String {
|
||||||
.replace(">", ">")
|
.replace(">", ">")
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_stylesheets(epub: &mut EpubBuilder<ZipLibrary>, app_config: &AppConfig) -> eyre::Result<()> {
|
fn add_stylesheets<T: epub_builder::Zip>(
|
||||||
|
epub: &mut EpubBuilder<T>,
|
||||||
|
app_config: &AppConfig,
|
||||||
|
) -> Result<(), epub_builder::Error> {
|
||||||
let body_stylesheet: &[u8] = include_bytes!("./assets/body.min.css");
|
let body_stylesheet: &[u8] = include_bytes!("./assets/body.min.css");
|
||||||
let header_stylesheet: &[u8] = include_bytes!("./assets/headers.min.css");
|
let header_stylesheet: &[u8] = include_bytes!("./assets/headers.min.css");
|
||||||
match app_config.css_config {
|
match app_config.css_config {
|
||||||
|
|
|
@ -15,8 +15,6 @@ pub enum ErrorKind {
|
||||||
UTF8Error(String),
|
UTF8Error(String),
|
||||||
#[error("[ReadabilityError]: {0}")]
|
#[error("[ReadabilityError]: {0}")]
|
||||||
ReadabilityError(String),
|
ReadabilityError(String),
|
||||||
#[error("[TemplateError]: {0}")]
|
|
||||||
TemplateError(String),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
|
@ -100,9 +98,9 @@ impl From<ErrorKind> for PaperoniError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<eyre::Error> for PaperoniError {
|
impl From<epub_builder::Error> for PaperoniError {
|
||||||
fn from(err: eyre::Error) -> Self {
|
fn from(err: epub_builder::Error) -> Self {
|
||||||
PaperoniError::with_kind(ErrorKind::EpubError(err.to_string()))
|
PaperoniError::with_kind(ErrorKind::EpubError(err.description().to_owned()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,12 +128,6 @@ impl From<std::str::Utf8Error> for PaperoniError {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<indicatif::style::TemplateError> for PaperoniError {
|
|
||||||
fn from(err: indicatif::style::TemplateError) -> Self {
|
|
||||||
PaperoniError::with_kind(ErrorKind::TemplateError(err.to_string()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum LogError {
|
pub enum LogError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use html5ever::{LocalName, Namespace, QualName};
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use kuchiki::{traits::*, NodeRef};
|
use kuchiki::{traits::*, NodeRef};
|
||||||
|
|
||||||
use crate::errors::PaperoniError;
|
use crate::errors::PaperoniError;
|
||||||
use crate::moz_readability::{MetaData, Readability};
|
use crate::moz_readability::{MetaData, Readability, HTML_NS};
|
||||||
|
|
||||||
/// A tuple of the url and an Option of the resource's MIME type
|
/// A tuple of the url and an Option of the resource's MIME type
|
||||||
pub type ResourceInfo = (String, Option<String>);
|
pub type ResourceInfo = (String, Option<String>);
|
||||||
|
@ -29,6 +32,7 @@ impl Article {
|
||||||
/// the source of the content
|
/// the source of the content
|
||||||
pub fn extract_content(&mut self) -> Result<(), PaperoniError> {
|
pub fn extract_content(&mut self) -> Result<(), PaperoniError> {
|
||||||
self.readability.parse(&self.url)?;
|
self.readability.parse(&self.url)?;
|
||||||
|
self.reinsert_title_heading();
|
||||||
if let Some(article_node_ref) = &self.readability.article_node {
|
if let Some(article_node_ref) = &self.readability.article_node {
|
||||||
let template = r#"
|
let template = r#"
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
@ -74,6 +78,20 @@ impl Article {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn reinsert_title_heading(&mut self) {
|
||||||
|
if let Some(article_node_ref) = &self.readability.article_node {
|
||||||
|
if let Ok(article_root_ref) = article_node_ref.select_first("div#readability-page-1") {
|
||||||
|
let article_root_elem = article_root_ref.as_node();
|
||||||
|
let h1_elem = NodeRef::new_element(
|
||||||
|
QualName::new(None, Namespace::from(HTML_NS), LocalName::from("h1")),
|
||||||
|
BTreeMap::new(),
|
||||||
|
);
|
||||||
|
h1_elem.append(NodeRef::new_text(self.readability.metadata.title()));
|
||||||
|
article_root_elem.prepend(h1_elem);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn metadata(&self) -> &MetaData {
|
pub fn metadata(&self) -> &MetaData {
|
||||||
&self.readability.metadata
|
&self.readability.metadata
|
||||||
}
|
}
|
||||||
|
|
11
src/html.rs
11
src/html.rs
|
@ -4,8 +4,7 @@ use std::{
|
||||||
path::Path,
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use base64::prelude::*;
|
use base64::encode;
|
||||||
|
|
||||||
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
use comfy_table::{Attribute, Cell, CellAlignment, Color, ContentArrangement, Table};
|
||||||
use html5ever::{LocalName, Namespace, QualName};
|
use html5ever::{LocalName, Namespace, QualName};
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
@ -44,11 +43,7 @@ pub fn generate_html_exports(
|
||||||
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
let enabled_bar = ProgressBar::new(articles.len() as u64);
|
||||||
let style = ProgressStyle::default_bar().template(
|
let style = ProgressStyle::default_bar().template(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} html {pos}/{len:7} {msg:.green}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} html {pos}/{len:7} {msg:.green}",
|
||||||
).map_err(|e| {
|
);
|
||||||
let mut paperoni_err: PaperoniError = e.into();
|
|
||||||
paperoni_err.set_article_source("progress bar");
|
|
||||||
vec![paperoni_err]
|
|
||||||
})?;
|
|
||||||
enabled_bar.set_style(style);
|
enabled_bar.set_style(style);
|
||||||
if !articles.is_empty() {
|
if !articles.is_empty() {
|
||||||
enabled_bar.set_message("Generating html files");
|
enabled_bar.set_message("Generating html files");
|
||||||
|
@ -272,7 +267,7 @@ fn update_imgs_base64(article: &Article) -> Result<(), std::io::Error> {
|
||||||
let img_base64_str = format!(
|
let img_base64_str = format!(
|
||||||
"data:image:{};base64,{}",
|
"data:image:{};base64,{}",
|
||||||
mime_type.as_deref().unwrap_or("image/*"),
|
mime_type.as_deref().unwrap_or("image/*"),
|
||||||
BASE64_STANDARD.encode(img_bytes)
|
encode(img_bytes)
|
||||||
);
|
);
|
||||||
|
|
||||||
let img_elems = article
|
let img_elems = article
|
||||||
|
|
130
src/http.rs
130
src/http.rs
|
@ -5,6 +5,7 @@ use futures::StreamExt;
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
use log::warn;
|
use log::warn;
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
use crate::cli::AppConfig;
|
use crate::cli::AppConfig;
|
||||||
|
@ -22,9 +23,54 @@ pub fn download(
|
||||||
let urls_iter = app_config.urls.iter().map(|url| fetch_html(url));
|
let urls_iter = app_config.urls.iter().map(|url| fetch_html(url));
|
||||||
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn);
|
let mut responses = stream::from_iter(urls_iter).buffered(app_config.max_conn);
|
||||||
let mut articles = Vec::new();
|
let mut articles = Vec::new();
|
||||||
|
// Collect all urls that couldn't extract here
|
||||||
|
// let mut retry_with_paperteer: Vec<String> = Vec::new();
|
||||||
while let Some(fetch_result) = responses.next().await {
|
while let Some(fetch_result) = responses.next().await {
|
||||||
match fetch_result {
|
match fetch_result {
|
||||||
Ok((url, html)) => {
|
Ok((url, html)) => {
|
||||||
|
match extract_and_download_imgs(
|
||||||
|
&url,
|
||||||
|
html,
|
||||||
|
bar,
|
||||||
|
partial_downloads,
|
||||||
|
&mut articles,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(_) => bar.inc(1),
|
||||||
|
|
||||||
|
// All errors are pushed into here since they're readability issues.
|
||||||
|
Err(e) => errors.push(e),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Outside the stream, make a new one to retry with paperteer
|
||||||
|
}
|
||||||
|
Err(e) => errors.push(e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if !retry_with_paperteer.is_empty() {
|
||||||
|
// fetch_html_from_paperteer(
|
||||||
|
// retry_with_paperteer,
|
||||||
|
// app_config,
|
||||||
|
// bar,
|
||||||
|
// partial_downloads,
|
||||||
|
// errors,
|
||||||
|
// &mut articles,
|
||||||
|
// )
|
||||||
|
// .await
|
||||||
|
// .unwrap();
|
||||||
|
// }
|
||||||
|
articles
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn extract_and_download_imgs<'a>(
|
||||||
|
url: &str,
|
||||||
|
html: String,
|
||||||
|
bar: &ProgressBar,
|
||||||
|
partial_downloads: &mut Vec<PartialDownload>,
|
||||||
|
articles: &mut Vec<Article>,
|
||||||
|
) -> Result<(), PaperoniError> {
|
||||||
debug!("Extracting {}", &url);
|
debug!("Extracting {}", &url);
|
||||||
let mut extractor = Article::from_html(&html, &url);
|
let mut extractor = Article::from_html(&html, &url);
|
||||||
bar.set_message("Extracting...");
|
bar.set_message("Extracting...");
|
||||||
|
@ -32,16 +78,14 @@ pub fn download(
|
||||||
Ok(_) => {
|
Ok(_) => {
|
||||||
extractor.extract_img_urls();
|
extractor.extract_img_urls();
|
||||||
if let Err(img_errors) =
|
if let Err(img_errors) =
|
||||||
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar)
|
download_images(&mut extractor, &Url::parse(&url).unwrap(), &bar).await
|
||||||
.await
|
|
||||||
{
|
{
|
||||||
partial_downloads
|
partial_downloads.push(PartialDownload::new(&url, extractor.metadata().title()));
|
||||||
.push(PartialDownload::new(&url, extractor.metadata().title()));
|
|
||||||
warn!(
|
warn!(
|
||||||
"{} image{} failed to download for {}",
|
"{} image{} failed to download for {}",
|
||||||
img_errors.len(),
|
img_errors.len(),
|
||||||
if img_errors.len() > 1 { "s" } else { "" },
|
if img_errors.len() > 1 { "s" } else { "" },
|
||||||
url
|
&url
|
||||||
);
|
);
|
||||||
for img_error in img_errors {
|
for img_error in img_errors {
|
||||||
warn!(
|
warn!(
|
||||||
|
@ -52,19 +96,87 @@ pub fn download(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
articles.push(extractor);
|
articles.push(extractor);
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
Err(mut e) => {
|
Err(mut e) => {
|
||||||
e.set_article_source(&url);
|
e.set_article_source(&url);
|
||||||
errors.push(e);
|
Err(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct PaperteerBody {
|
||||||
|
urls: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PaperteerBody {
|
||||||
|
fn new(urls: Vec<String>) -> Self {
|
||||||
|
PaperteerBody { urls }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct PaperteerItem {
|
||||||
|
url: String,
|
||||||
|
response: String,
|
||||||
|
html: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
|
struct PaperteerResponse {
|
||||||
|
data: Vec<PaperteerItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Change signature to simply take a vec of urls and return a vec of urls with either html or an error
|
||||||
|
// This also means that extracting and downloading imgs should be handled externally
|
||||||
|
async fn _fetch_html_from_paperteer(
|
||||||
|
urls: Vec<String>,
|
||||||
|
_app_config: &AppConfig,
|
||||||
|
bar: &ProgressBar,
|
||||||
|
partial_downloads: &mut Vec<PartialDownload>,
|
||||||
|
errors: &mut Vec<PaperoniError>,
|
||||||
|
articles: &mut Vec<Article>,
|
||||||
|
) -> Result<(), ()> {
|
||||||
|
// Get the paperteer url
|
||||||
|
let render_endpoint = "/api/render";
|
||||||
|
let paperteer_url = url::Url::parse("http://localhost:3000")
|
||||||
|
.unwrap()
|
||||||
|
.join(render_endpoint)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Build request body with urls
|
||||||
|
let urls_str = urls.into_iter().map(|url| url.to_string()).collect();
|
||||||
|
let body = PaperteerBody::new(urls_str);
|
||||||
|
|
||||||
|
// Send to the paperteer url
|
||||||
|
let mut res = surf::post(paperteer_url)
|
||||||
|
.body(surf::Body::from_json(&body).unwrap())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Receive the json response
|
||||||
|
// TODO: Check for body response
|
||||||
|
let PaperteerResponse { data } = res.body_json().await.unwrap();
|
||||||
|
|
||||||
|
// For each url, extract the article and images
|
||||||
|
for item in data {
|
||||||
|
let PaperteerItem {
|
||||||
|
html,
|
||||||
|
url,
|
||||||
|
response,
|
||||||
|
} = item;
|
||||||
|
if response == "ok" {
|
||||||
|
// Run the extract and download fn
|
||||||
|
match extract_and_download_imgs(&url, html, bar, partial_downloads, articles).await {
|
||||||
|
Ok(_) => bar.inc(1),
|
||||||
Err(e) => errors.push(e),
|
Err(e) => errors.push(e),
|
||||||
}
|
}
|
||||||
bar.inc(1);
|
} else {
|
||||||
|
errors.push(crate::errors::ErrorKind::HTTPError("Paperteer failed".into()).into());
|
||||||
}
|
}
|
||||||
articles
|
}
|
||||||
})
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
pub async fn fetch_html(url: &str) -> Result<HTMLResource, PaperoniError> {
|
||||||
|
|
|
@ -2,7 +2,7 @@ use std::fs;
|
||||||
|
|
||||||
use chrono::{DateTime, Local};
|
use chrono::{DateTime, Local};
|
||||||
use colored::*;
|
use colored::*;
|
||||||
use comfy_table::presets::UTF8_HORIZONTAL_ONLY;
|
use comfy_table::presets::UTF8_HORIZONTAL_BORDERS_ONLY;
|
||||||
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
use comfy_table::{Cell, CellAlignment, ContentArrangement, Table};
|
||||||
use flexi_logger::{FileSpec, LevelFilter};
|
use flexi_logger::{FileSpec, LevelFilter};
|
||||||
use log::error;
|
use log::error;
|
||||||
|
@ -38,7 +38,7 @@ pub fn display_summary(
|
||||||
println!("\n{}", "Partially failed downloads".yellow().bold());
|
println!("\n{}", "Partially failed downloads".yellow().bold());
|
||||||
let mut table_partial = Table::new();
|
let mut table_partial = Table::new();
|
||||||
table_partial
|
table_partial
|
||||||
.load_preset(UTF8_HORIZONTAL_ONLY)
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
.set_header(vec![
|
.set_header(vec![
|
||||||
Cell::new("Link").set_alignment(CellAlignment::Center),
|
Cell::new("Link").set_alignment(CellAlignment::Center),
|
||||||
Cell::new("Title").set_alignment(CellAlignment::Center),
|
Cell::new("Title").set_alignment(CellAlignment::Center),
|
||||||
|
@ -55,7 +55,7 @@ pub fn display_summary(
|
||||||
println!("\n{}", "Failed article downloads".bright_red().bold());
|
println!("\n{}", "Failed article downloads".bright_red().bold());
|
||||||
let mut table_failed = Table::new();
|
let mut table_failed = Table::new();
|
||||||
table_failed
|
table_failed
|
||||||
.load_preset(UTF8_HORIZONTAL_ONLY)
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
.set_header(vec![
|
.set_header(vec![
|
||||||
Cell::new("Link").set_alignment(CellAlignment::Center),
|
Cell::new("Link").set_alignment(CellAlignment::Center),
|
||||||
Cell::new("Reason").set_alignment(CellAlignment::Center),
|
Cell::new("Reason").set_alignment(CellAlignment::Center),
|
||||||
|
|
17
src/main.rs
17
src/main.rs
|
@ -2,10 +2,9 @@
|
||||||
extern crate lazy_static;
|
extern crate lazy_static;
|
||||||
|
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use colored::Colorize;
|
use colored::Colorize;
|
||||||
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_ONLY};
|
use comfy_table::presets::{UTF8_FULL, UTF8_HORIZONTAL_BORDERS_ONLY};
|
||||||
use comfy_table::{ContentArrangement, Table};
|
use comfy_table::{ContentArrangement, Table};
|
||||||
use http::download;
|
use http::download;
|
||||||
use indicatif::{ProgressBar, ProgressStyle};
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
|
@ -26,8 +25,6 @@ use epub::generate_epubs;
|
||||||
use html::generate_html_exports;
|
use html::generate_html_exports;
|
||||||
use logs::display_summary;
|
use logs::display_summary;
|
||||||
|
|
||||||
use crate::errors::PaperoniError;
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app_config = match cli::AppConfig::init_with_cli() {
|
let app_config = match cli::AppConfig::init_with_cli() {
|
||||||
Ok(app_config) => app_config,
|
Ok(app_config) => app_config,
|
||||||
|
@ -61,15 +58,9 @@ fn run(app_config: AppConfig) {
|
||||||
let enabled_bar = ProgressBar::new(app_config.urls.len() as u64);
|
let enabled_bar = ProgressBar::new(app_config.urls.len() as u64);
|
||||||
let style = ProgressStyle::default_bar().template(
|
let style = ProgressStyle::default_bar().template(
|
||||||
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
"{spinner:.cyan} [{elapsed_precise}] {bar:40.white} {:>8} link {pos}/{len:7} {msg:.yellow/white}",
|
||||||
).map_err(|e| {
|
);
|
||||||
let mut paperoni_err: PaperoniError = e.into();
|
|
||||||
paperoni_err.set_article_source("progress bar");
|
|
||||||
vec![paperoni_err]
|
|
||||||
});
|
|
||||||
if let Ok(style) = style {
|
|
||||||
enabled_bar.set_style(style);
|
enabled_bar.set_style(style);
|
||||||
}
|
enabled_bar.enable_steady_tick(500);
|
||||||
enabled_bar.enable_steady_tick(Duration::from_millis(500));
|
|
||||||
enabled_bar
|
enabled_bar
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -79,7 +70,7 @@ fn run(app_config: AppConfig) {
|
||||||
let mut successful_articles_table = Table::new();
|
let mut successful_articles_table = Table::new();
|
||||||
successful_articles_table
|
successful_articles_table
|
||||||
.load_preset(UTF8_FULL)
|
.load_preset(UTF8_FULL)
|
||||||
.load_preset(UTF8_HORIZONTAL_ONLY)
|
.load_preset(UTF8_HORIZONTAL_BORDERS_ONLY)
|
||||||
.set_content_arrangement(ContentArrangement::Dynamic);
|
.set_content_arrangement(ContentArrangement::Dynamic);
|
||||||
|
|
||||||
match app_config.export_type {
|
match app_config.export_type {
|
||||||
|
|
|
@ -17,7 +17,7 @@ const FLAG_STRIP_UNLIKELYS: u32 = 0x1;
|
||||||
const FLAG_WEIGHT_CLASSES: u32 = 0x2;
|
const FLAG_WEIGHT_CLASSES: u32 = 0x2;
|
||||||
const FLAG_CLEAN_CONDITIONALLY: u32 = 0x4;
|
const FLAG_CLEAN_CONDITIONALLY: u32 = 0x4;
|
||||||
const READABILITY_SCORE: &'static str = "readability-score";
|
const READABILITY_SCORE: &'static str = "readability-score";
|
||||||
const HTML_NS: &'static str = "http://www.w3.org/1999/xhtml";
|
pub const HTML_NS: &'static str = "http://www.w3.org/1999/xhtml";
|
||||||
// TODO: Change to HashSet
|
// TODO: Change to HashSet
|
||||||
const PHRASING_ELEMS: [&str; 39] = [
|
const PHRASING_ELEMS: [&str; 39] = [
|
||||||
"abbr", "audio", "b", "bdo", "br", "button", "cite", "code", "data", "datalist", "dfn", "em",
|
"abbr", "audio", "b", "bdo", "br", "button", "cite", "code", "data", "datalist", "dfn", "em",
|
||||||
|
|
Loading…
Reference in a new issue