Change CLI option to allow for multiple arguments
Add basic looping in async runtime
This commit is contained in:
parent
db11e78d8c
commit
1b4c4ee658
2 changed files with 57 additions and 46 deletions
|
@ -9,5 +9,5 @@ use structopt::StructOpt;
|
|||
pub struct Opts {
|
||||
// #[structopt(conflicts_with("links"))]
|
||||
/// Url of a web article
|
||||
pub url: Option<String>,
|
||||
pub urls: Vec<String>,
|
||||
}
|
||||
|
|
101
src/main.rs
101
src/main.rs
|
@ -15,63 +15,74 @@ mod moz_readability;
|
|||
use extractor::Extractor;
|
||||
fn main() {
|
||||
let opt = cli::Opts::from_args();
|
||||
if let Some(url) = opt.url {
|
||||
if !opt.urls.is_empty() {
|
||||
println!("Downloading single article");
|
||||
download(url)
|
||||
download(opt.urls);
|
||||
}
|
||||
}
|
||||
|
||||
async fn fetch_url(url: &str) -> String {
|
||||
type HTMLResource = (String, String);
|
||||
|
||||
async fn fetch_url(url: &str) -> HTMLResource {
|
||||
let client = surf::Client::new();
|
||||
println!("Fetching...");
|
||||
// TODO: Add middleware for following redirects
|
||||
client
|
||||
.get(url)
|
||||
.recv_string()
|
||||
.await
|
||||
.expect("Unable to fetch URL")
|
||||
(
|
||||
url.to_string(),
|
||||
client
|
||||
.get(url)
|
||||
.recv_string()
|
||||
.await
|
||||
.expect("Unable to fetch URL"),
|
||||
)
|
||||
}
|
||||
|
||||
fn download(url: String) {
|
||||
fn download(urls: Vec<String>) {
|
||||
let mut async_url_tasks = Vec::with_capacity(urls.len());
|
||||
for url in urls {
|
||||
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await }));
|
||||
}
|
||||
task::block_on(async {
|
||||
let html = fetch_url(&url).await;
|
||||
println!("Extracting");
|
||||
let mut extractor = Extractor::from_html(&html);
|
||||
extractor.extract_content(&url);
|
||||
if extractor.article().is_some() {
|
||||
create_dir("res/")
|
||||
.await
|
||||
.expect("Unable to create res/ output folder");
|
||||
extractor
|
||||
.download_images(&Url::parse(&url).unwrap())
|
||||
.await
|
||||
.expect("Unable to download images");
|
||||
let mut out_file =
|
||||
File::create(format!("{}.epub", extractor.metadata().title())).unwrap();
|
||||
let mut html_buf = Vec::new();
|
||||
extractor
|
||||
.article()
|
||||
.unwrap()
|
||||
.serialize(&mut html_buf)
|
||||
.expect("Unable to serialize");
|
||||
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
if let Some(author) = extractor.metadata().byline() {
|
||||
epub.metadata("author", author).unwrap();
|
||||
}
|
||||
epub.metadata("title", extractor.metadata().title())
|
||||
.unwrap();
|
||||
epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
|
||||
.unwrap();
|
||||
for img in extractor.img_urls {
|
||||
let file_path = format!("{}", &img.0);
|
||||
for url_task in async_url_tasks {
|
||||
let (url, html) = url_task.await;
|
||||
println!("Extracting");
|
||||
let mut extractor = Extractor::from_html(&html);
|
||||
extractor.extract_content(&url);
|
||||
if extractor.article().is_some() {
|
||||
create_dir("res/")
|
||||
.await
|
||||
.expect("Unable to create res/ output folder");
|
||||
extractor
|
||||
.download_images(&Url::parse(&url).unwrap())
|
||||
.await
|
||||
.expect("Unable to download images");
|
||||
let mut out_file =
|
||||
File::create(format!("{}.epub", extractor.metadata().title())).unwrap();
|
||||
let mut html_buf = Vec::new();
|
||||
extractor
|
||||
.article()
|
||||
.unwrap()
|
||||
.serialize(&mut html_buf)
|
||||
.expect("Unable to serialize");
|
||||
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
||||
let mut epub = EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap();
|
||||
if let Some(author) = extractor.metadata().byline() {
|
||||
epub.metadata("author", author).unwrap();
|
||||
}
|
||||
epub.metadata("title", extractor.metadata().title())
|
||||
.unwrap();
|
||||
epub.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
|
||||
.unwrap();
|
||||
for img in extractor.img_urls {
|
||||
let file_path = format!("{}", &img.0);
|
||||
|
||||
let img_buf = File::open(file_path).expect("Can't read file");
|
||||
epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap();
|
||||
let img_buf = File::open(file_path).expect("Can't read file");
|
||||
epub.add_resource(img.0, img_buf, img.1.unwrap()).unwrap();
|
||||
}
|
||||
epub.generate(&mut out_file).unwrap();
|
||||
println!("Cleaning up");
|
||||
remove_dir_all("res/").await.unwrap();
|
||||
}
|
||||
epub.generate(&mut out_file).unwrap();
|
||||
println!("Cleaning up");
|
||||
remove_dir_all("res/").await.unwrap();
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
Reference in a new issue