feat: Detect and ignore non-text files
All checks were successful
Test / test (push) Successful in 9s

Closes kemitix/forgejo-todo-checker#4
This commit is contained in:
Paul Campbell 2024-09-21 11:35:56 +01:00
parent 869af60a51
commit 5a1fedd94b
6 changed files with 115 additions and 40 deletions

View file

@ -5,16 +5,20 @@ edition = "2021"
[dependencies]
anyhow = "1.0"
regex = "1.10"
ureq = "2.10"
kxio = "1.2"
ignore = "0.4"
bon = "2.3"
tokio = { version = "1.37", features = [ "full" ] }
serde = { version = "1.0", features = [ "derive" ] }
ignore = "0.4"
file-format = { version = "0.25", features = ["reader-txt"] }
kxio = "1.2"
regex = "1.10"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1.37", features = ["full"] }
ureq = "2.10"
[dev-dependencies]
assert2 = "0.3"
pretty_assertions = "1.4"
rstest = "0.22"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }

View file

@ -23,7 +23,7 @@ pub async fn fetch_open_issues(config: &Config) -> Result<HashSet<Issue>> {
let issues: HashSet<Issue> = config
.net()
.get::<Vec<Issue>>(request)
.await?
.await? // tarpaulin uncovered okay
.response_body()
.unwrap_or_default()
.into_iter()

View file

@ -2,7 +2,7 @@
use anyhow::{bail, Result};
use init::init_config;
use issues::fetch_open_issues;
use scanner::find_markers;
use scanner::{find_markers, DefaultFileScanner};
mod init;
mod issues;
@ -14,6 +14,7 @@ mod scanner;
mod tests;
#[tokio::main]
#[cfg(not(tarpaulin_include))]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
Ok(run(kxio::network::Network::new_real()).await?)
}
@ -23,7 +24,7 @@ async fn run(net: kxio::network::Network) -> Result<()> {
let config = init_config(net)?;
let issues = fetch_open_issues(&config).await?;
let markers = find_markers(&config, issues)?;
let markers = find_markers(&config, issues, &DefaultFileScanner)?;
let mut errors = false;
for marker in (*markers).iter() {

View file

@ -19,6 +19,7 @@ impl Marker {
pub fn into_closed(self) -> Self {
match self {
Self::Valid(line, issue) => Self::Closed(line, issue),
#[cfg(not(tarpaulin_include))] // only ever called when is a Valid
_ => self,
}
}

View file

@ -6,46 +6,72 @@ use crate::{
model::{Config, Line, Marker, Markers},
};
use anyhow::Result;
use file_format::FileFormat;
use ignore::Walk;
pub fn find_markers(config: &Config, issues: HashSet<Issue>) -> Result<Markers, anyhow::Error> {
//<'a> = dyn Fn(&'a Path, &'a Config, &'a mut Markers, &'a HashSet<Issue>, Output=Result<()>);
pub trait FileScanner {
fn scan_file(
&self,
path: &Path,
config: &Config,
markers: &mut Markers,
issues: &HashSet<Issue>,
) -> Result<()>;
}
pub fn find_markers(
config: &Config,
issues: HashSet<Issue>,
file_scanner: &impl FileScanner,
) -> Result<Markers, anyhow::Error> {
let mut markers = Markers::default();
for file in Walk::new(config.fs().base()).flatten() {
let path = file.path();
if config.fs().path_is_file(path)? {
// TODO: (#4) ignore non-text files
scan_file(path, config, &mut markers, &issues)?;
if is_text_file(config, path)? {
file_scanner.scan_file(path, config, &mut markers, &issues)?
}
}
Ok(markers)
}
fn scan_file(
file: &Path,
config: &Config,
found_markers: &mut Markers,
issues: &HashSet<Issue>,
) -> Result<()> {
let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf();
config
.fs()
.file_read_to_string(file)?
.lines()
.enumerate()
.map(|(n, line)| {
Line::builder()
.file(file.to_path_buf())
.relative_path(relative_path.clone())
.num(n + 1) // line numbers are not 0-based, but enumerate is
.value(line.to_owned())
.build()
})
.filter_map(|line| line.into_marker().ok())
.filter(|marker| !matches!(marker, Marker::Unmarked))
.map(|marker| has_open_issue(marker, issues))
.for_each(|marker| found_markers.add_marker(marker));
fn is_text_file(config: &Config, path: &Path) -> Result<bool> {
Ok(config.fs().path_is_file(path)?
&& FileFormat::from_file(path)?
.media_type()
.starts_with("text/"))
}
Ok(())
pub struct DefaultFileScanner;
impl FileScanner for DefaultFileScanner {
fn scan_file(
&self,
file: &Path,
config: &Config,
markers: &mut Markers,
issues: &HashSet<Issue>,
) -> Result<()> {
let relative_path = file.strip_prefix(config.fs().base())?.to_path_buf();
config
.fs()
.file_read_to_string(file)? // tarpaulin uncovered okay
.lines()
.enumerate()
.map(|(n, line)| {
Line::builder()
.file(file.to_path_buf())
.relative_path(relative_path.clone())
.num(n + 1) // line numbers are not 0-based, but enumerate is
.value(line.to_owned())
.build()
})
.filter_map(|line| line.into_marker().ok())
.filter(|marker| !matches!(marker, Marker::Unmarked))
.map(|marker| has_open_issue(marker, issues))
.for_each(|marker| markers.add_marker(marker));
Ok(())
}
}
fn has_open_issue(marker: Marker, issues: &HashSet<Issue>) -> Marker {

View file

@ -1,7 +1,9 @@
use crate::scanner::FileScanner;
//
use super::*;
use std::collections::HashSet;
use std::{cell::RefCell, collections::HashSet, fs::File, io::Write, path::PathBuf};
use issues::Issue;
use model::Config;
@ -32,7 +34,7 @@ fn find_markers_in_dir() -> anyhow::Result<()> {
let issues = HashSet::from_iter(vec![Issue::new(23), Issue::new(43)]);
//when
let markers = find_markers(&config, issues)?;
let markers = find_markers(&config, issues, &DefaultFileScanner)?;
//then
assert_eq!(
@ -53,3 +55,44 @@ fn find_markers_in_dir() -> anyhow::Result<()> {
Ok(())
}
#[test]
fn skips_binary_files() -> Result<()> {
//given
let fs = kxio::fs::temp()?;
let binary_path = fs.base().join("binary_file.bin");
let mut binary_file = File::create(binary_path)?;
binary_file.write_all(&[0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])?;
let text_path = fs.base().join("text_file.txt");
fs.file_write(&text_path, "text contents")?;
let net = kxio::network::Network::new_mock();
let config = a_config(net, fs)?;
let issues = HashSet::new();
let file_scanner = TestFileScanner::default();
//when
find_markers(&config, issues, &file_scanner)?;
//then
assert_eq!(file_scanner.scanned.take(), vec![text_path]);
Ok(())
}
#[derive(Default)]
struct TestFileScanner {
scanned: RefCell<Vec<PathBuf>>,
}
impl FileScanner for TestFileScanner {
fn scan_file(
&self,
path: &std::path::Path,
_config: &Config,
_markers: &mut model::Markers,
_issues: &HashSet<Issue>,
) -> Result<()> {
self.scanned.borrow_mut().push(path.to_path_buf());
Ok(())
}
}