feat: Detect and ignore non-text files
All checks were successful
Test / test (push) Successful in 9s

Closes kemitix/forgejo-todo-checker#4
This commit is contained in:
Paul Campbell 2024-09-21 11:35:56 +01:00
parent 869af60a51
commit 5a1fedd94b
6 changed files with 115 additions and 40 deletions

View file

@ -5,16 +5,20 @@ edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0" anyhow = "1.0"
regex = "1.10"
ureq = "2.10"
kxio = "1.2"
ignore = "0.4"
bon = "2.3" bon = "2.3"
tokio = { version = "1.37", features = [ "full" ] } ignore = "0.4"
file-format = { version = "0.25", features = ["reader-txt"] }
kxio = "1.2"
regex = "1.10"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
tokio = { version = "1.37", features = ["full"] }
ureq = "2.10"
[dev-dependencies] [dev-dependencies]
assert2 = "0.3" assert2 = "0.3"
pretty_assertions = "1.4" pretty_assertions = "1.4"
rstest = "0.22" rstest = "0.22"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }

View file

@ -23,7 +23,7 @@ pub async fn fetch_open_issues(config: &Config) -> Result<HashSet<Issue>> {
let issues: HashSet<Issue> = config let issues: HashSet<Issue> = config
.net() .net()
.get::<Vec<Issue>>(request) .get::<Vec<Issue>>(request)
.await? .await? // tarpaulin uncovered okay
.response_body() .response_body()
.unwrap_or_default() .unwrap_or_default()
.into_iter() .into_iter()

View file

@ -2,7 +2,7 @@
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use init::init_config; use init::init_config;
use issues::fetch_open_issues; use issues::fetch_open_issues;
use scanner::find_markers; use scanner::{find_markers, DefaultFileScanner};
mod init; mod init;
mod issues; mod issues;
@ -14,6 +14,7 @@ mod scanner;
mod tests; mod tests;
#[tokio::main] #[tokio::main]
#[cfg(not(tarpaulin_include))]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> { async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
Ok(run(kxio::network::Network::new_real()).await?) Ok(run(kxio::network::Network::new_real()).await?)
} }
@ -23,7 +24,7 @@ async fn run(net: kxio::network::Network) -> Result<()> {
let config = init_config(net)?; let config = init_config(net)?;
let issues = fetch_open_issues(&config).await?; let issues = fetch_open_issues(&config).await?;
let markers = find_markers(&config, issues)?; let markers = find_markers(&config, issues, &DefaultFileScanner)?;
let mut errors = false; let mut errors = false;
for marker in (*markers).iter() { for marker in (*markers).iter() {

View file

@ -19,6 +19,7 @@ impl Marker {
pub fn into_closed(self) -> Self { pub fn into_closed(self) -> Self {
match self { match self {
Self::Valid(line, issue) => Self::Closed(line, issue), Self::Valid(line, issue) => Self::Closed(line, issue),
#[cfg(not(tarpaulin_include))] // only ever called when is a Valid
_ => self, _ => self,
} }
} }

View file

@ -6,30 +6,55 @@ use crate::{
model::{Config, Line, Marker, Markers}, model::{Config, Line, Marker, Markers},
}; };
use anyhow::Result; use anyhow::Result;
use file_format::FileFormat;
use ignore::Walk; use ignore::Walk;
pub fn find_markers(config: &Config, issues: HashSet<Issue>) -> Result<Markers, anyhow::Error> { //<'a> = dyn Fn(&'a Path, &'a Config, &'a mut Markers, &'a HashSet<Issue>, Output=Result<()>);
pub trait FileScanner {
fn scan_file(
&self,
path: &Path,
config: &Config,
markers: &mut Markers,
issues: &HashSet<Issue>,
) -> Result<()>;
}
pub fn find_markers(
config: &Config,
issues: HashSet<Issue>,
file_scanner: &impl FileScanner,
) -> Result<Markers, anyhow::Error> {
let mut markers = Markers::default(); let mut markers = Markers::default();
for file in Walk::new(config.fs().base()).flatten() { for file in Walk::new(config.fs().base()).flatten() {
let path = file.path(); let path = file.path();
if config.fs().path_is_file(path)? { if is_text_file(config, path)? {
// TODO: (#4) ignore non-text files file_scanner.scan_file(path, config, &mut markers, &issues)?
scan_file(path, config, &mut markers, &issues)?;
} }
} }
Ok(markers) Ok(markers)
} }
fn is_text_file(config: &Config, path: &Path) -> Result<bool> {
Ok(config.fs().path_is_file(path)?
&& FileFormat::from_file(path)?
.media_type()
.starts_with("text/"))
}
pub struct DefaultFileScanner;
impl FileScanner for DefaultFileScanner {
fn scan_file( fn scan_file(
&self,
file: &Path, file: &Path,
config: &Config, config: &Config,
found_markers: &mut Markers, markers: &mut Markers,
issues: &HashSet<Issue>, issues: &HashSet<Issue>,
) -> Result<()> { ) -> Result<()> {
let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf(); let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf();
config config
.fs() .fs()
.file_read_to_string(file)? .file_read_to_string(file)? // tarpaulin uncovered okay
.lines() .lines()
.enumerate() .enumerate()
.map(|(n, line)| { .map(|(n, line)| {
@ -43,10 +68,11 @@ fn scan_file(
.filter_map(|line| line.into_marker().ok()) .filter_map(|line| line.into_marker().ok())
.filter(|marker| !matches!(marker, Marker::Unmarked)) .filter(|marker| !matches!(marker, Marker::Unmarked))
.map(|marker| has_open_issue(marker, issues)) .map(|marker| has_open_issue(marker, issues))
.for_each(|marker| found_markers.add_marker(marker)); .for_each(|marker| markers.add_marker(marker));
Ok(()) Ok(())
} }
}
fn has_open_issue(marker: Marker, issues: &HashSet<Issue>) -> Marker { fn has_open_issue(marker: Marker, issues: &HashSet<Issue>) -> Marker {
if let Marker::Valid(_, ref issue) = marker { if let Marker::Valid(_, ref issue) = marker {

View file

@ -1,7 +1,9 @@
use crate::scanner::FileScanner;
// //
use super::*; use super::*;
use std::collections::HashSet; use std::{cell::RefCell, collections::HashSet, fs::File, io::Write, path::PathBuf};
use issues::Issue; use issues::Issue;
use model::Config; use model::Config;
@ -32,7 +34,7 @@ fn find_markers_in_dir() -> anyhow::Result<()> {
let issues = HashSet::from_iter(vec![Issue::new(23), Issue::new(43)]); let issues = HashSet::from_iter(vec![Issue::new(23), Issue::new(43)]);
//when //when
let markers = find_markers(&config, issues)?; let markers = find_markers(&config, issues, &DefaultFileScanner)?;
//then //then
assert_eq!( assert_eq!(
@ -53,3 +55,44 @@ fn find_markers_in_dir() -> anyhow::Result<()> {
Ok(()) Ok(())
} }
#[test]
fn skips_binary_files() -> Result<()> {
//given
let fs = kxio::fs::temp()?;
let binary_path = fs.base().join("binary_file.bin");
let mut binary_file = File::create(binary_path)?;
binary_file.write_all(&[0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])?;
let text_path = fs.base().join("text_file.txt");
fs.file_write(&text_path, "text contents")?;
let net = kxio::network::Network::new_mock();
let config = a_config(net, fs)?;
let issues = HashSet::new();
let file_scanner = TestFileScanner::default();
//when
find_markers(&config, issues, &file_scanner)?;
//then
assert_eq!(file_scanner.scanned.take(), vec![text_path]);
Ok(())
}
#[derive(Default)]
struct TestFileScanner {
scanned: RefCell<Vec<PathBuf>>,
}
impl FileScanner for TestFileScanner {
fn scan_file(
&self,
path: &std::path::Path,
_config: &Config,
_markers: &mut model::Markers,
_issues: &HashSet<Issue>,
) -> Result<()> {
self.scanned.borrow_mut().push(path.to_path_buf());
Ok(())
}
}