2020-05-01 18:42:41 +01:00
|
|
|
use std::fs::File;
|
|
|
|
|
2020-04-30 09:05:53 +01:00
|
|
|
use async_std::task;
|
2020-05-02 17:25:31 +01:00
|
|
|
use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
|
2020-05-02 16:33:45 +01:00
|
|
|
use url::Url;
|
2020-04-30 09:05:53 +01:00
|
|
|
|
2020-05-01 14:17:59 +01:00
|
|
|
mod extractor;
|
|
|
|
|
|
|
|
use extractor::Extractor;
|
2020-04-30 09:05:53 +01:00
|
|
|
fn main() {
|
|
|
|
task::block_on(async {
|
|
|
|
let urls = vec![
|
|
|
|
"https://saveandrun.com/posts/2020-01-24-generating-mazes-with-haskell-part-1.html",
|
|
|
|
"https://saveandrun.com/posts/2020-04-05-querying-pacman-with-datalog.html",
|
|
|
|
"https://blog.hipstermojo.xyz/posts/redis-orm-preface/",
|
|
|
|
"https://vuejsdevelopers.com/2020/03/31/vue-js-form-composition-api/?utm_campaign=xl5&utm_medium=article&utm_source=vuejsnews#adding-validators",
|
2020-05-01 14:17:59 +01:00
|
|
|
"https://medium.com/typeforms-engineering-blog/the-beginners-guide-to-oauth-dancing-4b8f3666de10",
|
|
|
|
"https://dev.to/steelwolf180/full-stack-development-in-django-3768"
|
2020-04-30 09:05:53 +01:00
|
|
|
];
|
2020-05-02 17:06:03 +01:00
|
|
|
let html = fetch_url(urls[5]).await;
|
2020-05-02 12:51:53 +01:00
|
|
|
let mut extractor = Extractor::from_html(&html);
|
2020-05-01 18:42:41 +01:00
|
|
|
println!("Extracting");
|
2020-05-02 12:51:53 +01:00
|
|
|
extractor.extract_content();
|
2020-05-02 16:33:45 +01:00
|
|
|
extractor
|
2020-05-02 17:06:03 +01:00
|
|
|
.download_images(&Url::parse(urls[5]).unwrap())
|
2020-05-02 16:33:45 +01:00
|
|
|
.await
|
|
|
|
.expect("Unable to download images");
|
2020-05-02 17:25:31 +01:00
|
|
|
let mut out_file = File::create("out.epub").unwrap();
|
|
|
|
let mut html_buf = Vec::new();
|
2020-05-02 17:06:03 +01:00
|
|
|
extractor
|
|
|
|
.content
|
|
|
|
.unwrap()
|
|
|
|
.as_node()
|
2020-05-02 17:25:31 +01:00
|
|
|
.serialize(&mut html_buf)
|
2020-05-02 17:06:03 +01:00
|
|
|
.expect("Unable to serialize");
|
2020-05-02 17:25:31 +01:00
|
|
|
let html_buf = std::str::from_utf8(&html_buf).unwrap();
|
|
|
|
EpubBuilder::new(ZipLibrary::new().unwrap())
|
|
|
|
.unwrap()
|
|
|
|
.add_content(EpubContent::new("code.xhtml", html_buf.as_bytes()))
|
|
|
|
.unwrap()
|
|
|
|
.generate(&mut out_file)
|
|
|
|
.unwrap();
|
|
|
|
})
|
2020-04-30 09:05:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn fetch_url(url: &str) -> String {
|
|
|
|
let client = surf::Client::new();
|
|
|
|
println!("Fetching...");
|
2020-05-02 16:33:45 +01:00
|
|
|
// TODO: Add middleware for following redirects
|
2020-04-30 09:05:53 +01:00
|
|
|
client
|
|
|
|
.get(url)
|
|
|
|
.recv_string()
|
|
|
|
.await
|
|
|
|
.expect("Unable to fetch URL")
|
|
|
|
}
|