Add filter to select files to be synced (#24)
* [Filter] added * [Config] Add filters field * [ParseArgs] Add '-f'/'--filter' parameters * [LocalFileStream] apply filters * [SyncLogging] show filter(s) * [LocalFileStream] Don't apply filter to directories The filter may match on a file within a directory, but if the filter fails on the directory alone, then we weren't recursing into the directory at all.
This commit is contained in:
parent
35e85702aa
commit
b9bc7dc957
6 changed files with 68 additions and 1 deletions
|
@ -5,6 +5,7 @@ import java.io.File
|
||||||
case class Config(bucket: Bucket = Bucket(""),
|
case class Config(bucket: Bucket = Bucket(""),
|
||||||
prefix: RemoteKey = RemoteKey(""),
|
prefix: RemoteKey = RemoteKey(""),
|
||||||
verbose: Int = 1,
|
verbose: Int = 1,
|
||||||
|
filters: Seq[Filter] = List(),
|
||||||
excludes: Seq[Exclude] = List(),
|
excludes: Seq[Exclude] = List(),
|
||||||
multiPartThreshold: Long = 1024 * 1024 * 5,
|
multiPartThreshold: Long = 1024 * 1024 * 5,
|
||||||
maxRetries: Int = 3,
|
maxRetries: Int = 3,
|
||||||
|
|
16
src/main/scala/net/kemitix/s3thorp/Filter.scala
Normal file
16
src/main/scala/net/kemitix/s3thorp/Filter.scala
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
package net.kemitix.s3thorp
|
||||||
|
|
||||||
|
import java.nio.file.Path
|
||||||
|
import java.util.function.Predicate
|
||||||
|
import java.util.regex.Pattern
|
||||||
|
|
||||||
|
case class Filter(filter: String = ".*") {
|
||||||
|
|
||||||
|
lazy val predicate: Predicate[String] = Pattern.compile(filter).asPredicate.negate
|
||||||
|
|
||||||
|
def isIncluded(path: Path): Boolean = !isExcluded(path)
|
||||||
|
|
||||||
|
def isExcluded(path: Path): Boolean = predicate.test(path.toString)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -10,7 +10,9 @@ trait LocalFileStream
|
||||||
(implicit c: Config): Stream[LocalFile] = {
|
(implicit c: Config): Stream[LocalFile] = {
|
||||||
log5(s"- Entering: $file")
|
log5(s"- Entering: $file")
|
||||||
val files = for {
|
val files = for {
|
||||||
f <- dirPaths(file) filter { f => c.excludes.forall { filter => filter isIncluded f.toPath } }
|
f <- dirPaths(file)
|
||||||
|
.filter { f => f.isDirectory || c.filters.forall { filter => filter isIncluded f.toPath } }
|
||||||
|
.filter { f => c.excludes.forall { exclude => exclude isIncluded f.toPath } }
|
||||||
fs <- recurseIntoSubDirectories(f)
|
fs <- recurseIntoSubDirectories(f)
|
||||||
} yield fs
|
} yield fs
|
||||||
log5(s"- Leaving: $file")
|
log5(s"- Leaving: $file")
|
||||||
|
|
|
@ -25,6 +25,9 @@ object ParseArgs {
|
||||||
opt[String]('p', "prefix")
|
opt[String]('p', "prefix")
|
||||||
.action((str, c) => c.copy(prefix = RemoteKey(str)))
|
.action((str, c) => c.copy(prefix = RemoteKey(str)))
|
||||||
.text("Prefix within the S3 Bucket"),
|
.text("Prefix within the S3 Bucket"),
|
||||||
|
opt[Seq[String]]('f', "filter")
|
||||||
|
.action((str, c) => c.copy(filters = str.map(Filter)))
|
||||||
|
.text("Filter only matching paths"),
|
||||||
opt[Seq[String]]('x', "exclude")
|
opt[Seq[String]]('x', "exclude")
|
||||||
.action((str,c) => c.copy(excludes = str.map(Exclude)))
|
.action((str,c) => c.copy(excludes = str.map(Exclude)))
|
||||||
.text("Exclude matching paths"),
|
.text("Exclude matching paths"),
|
||||||
|
|
|
@ -7,6 +7,7 @@ trait SyncLogging extends Logging {
|
||||||
|
|
||||||
def logRunStart(implicit c: Config): Unit =
|
def logRunStart(implicit c: Config): Unit =
|
||||||
log1(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, " +
|
log1(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, " +
|
||||||
|
s"Filter: ${c.filters.map{ f => f.filter}.mkString(""", """)} " +
|
||||||
s"Exclude: ${c.excludes.map{ f => f.exclude}.mkString(""", """)}")(c)
|
s"Exclude: ${c.excludes.map{ f => f.exclude}.mkString(""", """)}")(c)
|
||||||
|
|
||||||
def logFileScan(implicit c: Config): Unit =
|
def logFileScan(implicit c: Config): Unit =
|
||||||
|
|
44
src/test/scala/net/kemitix/s3thorp/FilterSuite.scala
Normal file
44
src/test/scala/net/kemitix/s3thorp/FilterSuite.scala
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
package net.kemitix.s3thorp
|
||||||
|
|
||||||
|
import java.nio.file.{Path, Paths}
|
||||||
|
|
||||||
|
class FilterSuite extends UnitTest {
|
||||||
|
|
||||||
|
describe("default filter") {
|
||||||
|
val filter = Filter()
|
||||||
|
val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file",
|
||||||
|
"/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir"
|
||||||
|
) map { p => Paths.get(p)}
|
||||||
|
it("should not exclude files") {
|
||||||
|
paths.foreach(path => { assertResult(false)(filter.isExcluded(path)) })
|
||||||
|
}
|
||||||
|
it("should include files") {
|
||||||
|
paths.foreach(path => assertResult(true)(filter.isIncluded(path)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
describe("directory exact match include '/upload/subdir/'") {
|
||||||
|
val filter = Filter("/upload/subdir/")
|
||||||
|
it("include matching directory") {
|
||||||
|
val matching = Paths.get("/upload/subdir/leaf-file")
|
||||||
|
assertResult(true)(filter.isIncluded(matching))
|
||||||
|
}
|
||||||
|
it("exclude non-matching files") {
|
||||||
|
val nonMatching = Paths.get("/upload/other-file")
|
||||||
|
assertResult(true)(filter.isExcluded(nonMatching))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
describe("file partial match 'root'") {
|
||||||
|
val filter = Filter("root")
|
||||||
|
it("include matching file '/upload/root-file") {
|
||||||
|
val matching = Paths.get("/upload/root-file")
|
||||||
|
assertResult(true)(filter.isIncluded(matching))
|
||||||
|
}
|
||||||
|
it("exclude non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") {
|
||||||
|
val nonMatching1 = Paths.get("/test-file-for-hash.txt")
|
||||||
|
val nonMatching2 = Paths.get("/upload/subdir/leaf-file")
|
||||||
|
assertResult(true)(filter.isExcluded(nonMatching1))
|
||||||
|
assertResult(true)(filter.isExcluded(nonMatching2))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in a new issue