Add basic redirect provided by surf and early exit of the program if the response is not a 200

This commit is contained in:
Kenneth Gitere 2020-11-24 17:44:31 +03:00
parent 5f99bddc10
commit 725c73c83f
2 changed files with 18 additions and 12 deletions

View file

@ -74,7 +74,11 @@ impl Extractor {
let abs_url = get_absolute_url(&img_url, article_origin); let abs_url = get_absolute_url(&img_url, article_origin);
async_download_tasks.push(task::spawn(async move { async_download_tasks.push(task::spawn(async move {
let mut img_response = surf::get(&abs_url).await.expect("Unable to retrieve file"); let mut img_response = surf::Client::new()
.with(surf::middleware::Redirect::default())
.get(&abs_url)
.await
.expect("Unable to retrieve file");
let img_content: Vec<u8> = img_response.body_bytes().await.unwrap(); let img_content: Vec<u8> = img_response.body_bytes().await.unwrap();
let img_mime = img_response let img_mime = img_response
.content_type() .content_type()

View file

@ -23,24 +23,26 @@ fn main() {
type HTMLResource = (String, String); type HTMLResource = (String, String);
async fn fetch_url(url: &str) -> HTMLResource { async fn fetch_url(url: &str) -> Result<HTMLResource, Box<dyn std::error::Error>> {
let client = surf::Client::new(); let client = surf::Client::new();
println!("Fetching..."); println!("Fetching...");
// TODO: Add middleware for following redirects let mut res = client
( .with(surf::middleware::Redirect::default())
url.to_string(), .get(url)
client .send()
.get(url) .await
.recv_string() .expect(&format!("Unable to fetch {}", url));
.await if res.status() == 200 {
.expect("Unable to fetch URL"), Ok((url.to_string(), res.body_string().await?))
) } else {
Err("Request failed to return HTTP 200".into())
}
} }
fn download(urls: Vec<String>) { fn download(urls: Vec<String>) {
let mut async_url_tasks = Vec::with_capacity(urls.len()); let mut async_url_tasks = Vec::with_capacity(urls.len());
for url in urls { for url in urls {
async_url_tasks.push(task::spawn(async move { fetch_url(&url).await })); async_url_tasks.push(task::spawn(async move { fetch_url(&url).await.unwrap() }));
} }
task::block_on(async { task::block_on(async {
for url_task in async_url_tasks { for url_task in async_url_tasks {