From 5a1fedd94b9880391cabecb78e2d3c35ae1b141f Mon Sep 17 00:00:00 2001 From: Paul Campbell Date: Sat, 21 Sep 2024 11:35:56 +0100 Subject: [PATCH] feat: Detect and ignore non-text files Closes kemitix/forgejo-todo-checker#4 --- Cargo.toml | 16 +++++--- src/issues/fetch.rs | 2 +- src/main.rs | 5 ++- src/model/markers/mod.rs | 1 + src/scanner.rs | 84 ++++++++++++++++++++++++++-------------- src/tests/scanner.rs | 47 +++++++++++++++++++++- 6 files changed, 115 insertions(+), 40 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 30d62dc..4be34f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,16 +5,20 @@ edition = "2021" [dependencies] anyhow = "1.0" -regex = "1.10" -ureq = "2.10" -kxio = "1.2" -ignore = "0.4" bon = "2.3" -tokio = { version = "1.37", features = [ "full" ] } -serde = { version = "1.0", features = [ "derive" ] } +ignore = "0.4" +file-format = { version = "0.25", features = ["reader-txt"] } +kxio = "1.2" +regex = "1.10" +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +tokio = { version = "1.37", features = ["full"] } +ureq = "2.10" [dev-dependencies] assert2 = "0.3" pretty_assertions = "1.4" rstest = "0.22" + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] } diff --git a/src/issues/fetch.rs b/src/issues/fetch.rs index 63bcba6..b10fd23 100644 --- a/src/issues/fetch.rs +++ b/src/issues/fetch.rs @@ -23,7 +23,7 @@ pub async fn fetch_open_issues(config: &Config) -> Result> { let issues: HashSet = config .net() .get::>(request) - .await? + .await? // tarpaulin uncovered okay .response_body() .unwrap_or_default() .into_iter() diff --git a/src/main.rs b/src/main.rs index 088743a..bd0757c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use anyhow::{bail, Result}; use init::init_config; use issues::fetch_open_issues; -use scanner::find_markers; +use scanner::{find_markers, DefaultFileScanner}; mod init; mod issues; @@ -14,6 +14,7 @@ mod scanner; mod tests; #[tokio::main] +#[cfg(not(tarpaulin_include))] async fn main() -> std::result::Result<(), Box> { Ok(run(kxio::network::Network::new_real()).await?) } @@ -23,7 +24,7 @@ async fn run(net: kxio::network::Network) -> Result<()> { let config = init_config(net)?; let issues = fetch_open_issues(&config).await?; - let markers = find_markers(&config, issues)?; + let markers = find_markers(&config, issues, &DefaultFileScanner)?; let mut errors = false; for marker in (*markers).iter() { diff --git a/src/model/markers/mod.rs b/src/model/markers/mod.rs index e392629..afb44ac 100644 --- a/src/model/markers/mod.rs +++ b/src/model/markers/mod.rs @@ -19,6 +19,7 @@ impl Marker { pub fn into_closed(self) -> Self { match self { Self::Valid(line, issue) => Self::Closed(line, issue), + #[cfg(not(tarpaulin_include))] // only ever called when is a Valid _ => self, } } diff --git a/src/scanner.rs b/src/scanner.rs index d16d436..36ec1d7 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -6,46 +6,72 @@ use crate::{ model::{Config, Line, Marker, Markers}, }; use anyhow::Result; +use file_format::FileFormat; use ignore::Walk; -pub fn find_markers(config: &Config, issues: HashSet) -> Result { +//<'a> = dyn Fn(&'a Path, &'a Config, &'a mut Markers, &'a HashSet, Output=Result<()>); +pub trait FileScanner { + fn scan_file( + &self, + path: &Path, + config: &Config, + markers: &mut Markers, + issues: &HashSet, + ) -> Result<()>; +} + +pub fn find_markers( + config: &Config, + issues: HashSet, + file_scanner: &impl FileScanner, +) -> Result { let mut markers = Markers::default(); for file in Walk::new(config.fs().base()).flatten() { let path = file.path(); - if config.fs().path_is_file(path)? { - // TODO: (#4) ignore non-text files - scan_file(path, config, &mut markers, &issues)?; + if is_text_file(config, path)? { + file_scanner.scan_file(path, config, &mut markers, &issues)? } } Ok(markers) } -fn scan_file( - file: &Path, - config: &Config, - found_markers: &mut Markers, - issues: &HashSet, -) -> Result<()> { - let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf(); - config - .fs() - .file_read_to_string(file)? - .lines() - .enumerate() - .map(|(n, line)| { - Line::builder() - .file(file.to_path_buf()) - .relative_path(relative_path.clone()) - .num(n + 1) // line numbers are not 0-based, but enumerate is - .value(line.to_owned()) - .build() - }) - .filter_map(|line| line.into_marker().ok()) - .filter(|marker| !matches!(marker, Marker::Unmarked)) - .map(|marker| has_open_issue(marker, issues)) - .for_each(|marker| found_markers.add_marker(marker)); +fn is_text_file(config: &Config, path: &Path) -> Result { + Ok(config.fs().path_is_file(path)? + && FileFormat::from_file(path)? + .media_type() + .starts_with("text/")) +} - Ok(()) +pub struct DefaultFileScanner; +impl FileScanner for DefaultFileScanner { + fn scan_file( + &self, + file: &Path, + config: &Config, + markers: &mut Markers, + issues: &HashSet, + ) -> Result<()> { + let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf(); + config + .fs() + .file_read_to_string(file)? // tarpaulin uncovered okay + .lines() + .enumerate() + .map(|(n, line)| { + Line::builder() + .file(file.to_path_buf()) + .relative_path(relative_path.clone()) + .num(n + 1) // line numbers are not 0-based, but enumerate is + .value(line.to_owned()) + .build() + }) + .filter_map(|line| line.into_marker().ok()) + .filter(|marker| !matches!(marker, Marker::Unmarked)) + .map(|marker| has_open_issue(marker, issues)) + .for_each(|marker| markers.add_marker(marker)); + + Ok(()) + } } fn has_open_issue(marker: Marker, issues: &HashSet) -> Marker { diff --git a/src/tests/scanner.rs b/src/tests/scanner.rs index 2d5b2b7..8e6ea2a 100644 --- a/src/tests/scanner.rs +++ b/src/tests/scanner.rs @@ -1,7 +1,9 @@ +use crate::scanner::FileScanner; + // use super::*; -use std::collections::HashSet; +use std::{cell::RefCell, collections::HashSet, fs::File, io::Write, path::PathBuf}; use issues::Issue; use model::Config; @@ -32,7 +34,7 @@ fn find_markers_in_dir() -> anyhow::Result<()> { let issues = HashSet::from_iter(vec![Issue::new(23), Issue::new(43)]); //when - let markers = find_markers(&config, issues)?; + let markers = find_markers(&config, issues, &DefaultFileScanner)?; //then assert_eq!( @@ -53,3 +55,44 @@ fn find_markers_in_dir() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn skips_binary_files() -> Result<()> { + //given + let fs = kxio::fs::temp()?; + let binary_path = fs.base().join("binary_file.bin"); + let mut binary_file = File::create(binary_path)?; + binary_file.write_all(&[0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])?; + let text_path = fs.base().join("text_file.txt"); + fs.file_write(&text_path, "text contents")?; + + let net = kxio::network::Network::new_mock(); + let config = a_config(net, fs)?; + let issues = HashSet::new(); + let file_scanner = TestFileScanner::default(); + + //when + find_markers(&config, issues, &file_scanner)?; + + //then + assert_eq!(file_scanner.scanned.take(), vec![text_path]); + + Ok(()) +} + +#[derive(Default)] +struct TestFileScanner { + scanned: RefCell>, +} +impl FileScanner for TestFileScanner { + fn scan_file( + &self, + path: &std::path::Path, + _config: &Config, + _markers: &mut model::Markers, + _issues: &HashSet, + ) -> Result<()> { + self.scanned.borrow_mut().push(path.to_path_buf()); + Ok(()) + } +}