From 0fe9b86471f6555dba3f2463c4dc362eab459197 Mon Sep 17 00:00:00 2001 From: Paul Campbell Date: Thu, 23 May 2019 09:21:09 +0100 Subject: [PATCH] Simple Exclusion Filter (#16) * [filter] Parse filter from command line and add to config * [filter] exclude file that match the filter --- CHANGELOG.org | 8 +++- README.org | 1 + .../scala/net/kemitix/s3thorp/Config.scala | 1 + .../scala/net/kemitix/s3thorp/Filter.scala | 15 +++++++ .../net/kemitix/s3thorp/LocalFileStream.scala | 2 +- .../scala/net/kemitix/s3thorp/ParseArgs.scala | 3 ++ .../net/kemitix/s3thorp/FilterSuite.scala | 44 +++++++++++++++++++ .../scala/net/kemitix/s3thorp/SyncSuite.scala | 15 ++++++- 8 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 src/main/scala/net/kemitix/s3thorp/Filter.scala create mode 100644 src/test/scala/net/kemitix/s3thorp/FilterSuite.scala diff --git a/CHANGELOG.org b/CHANGELOG.org index f907497..445fc68 100644 --- a/CHANGELOG.org +++ b/CHANGELOG.org @@ -5,7 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [[https://keepachangelog.com/en/1.0.0/][Keep a Changelog]], and this project adheres to [[https://semver.org/spec/v2.0.0.html][Semantic Versioning]]. -* [Unreleased] +* [0.3.0] - ??? + +** Added + + - Filter to exclude files + +* [0.2.0] - 2019-05-22 ** Added diff --git a/README.org b/README.org index 6f1a137..9f3509b 100644 --- a/README.org +++ b/README.org @@ -17,6 +17,7 @@ hash of the file contents. -s, --source Source directory to sync to S3 -b, --bucket S3 bucket name -p, --prefix Prefix within the S3 Bucket + -f, --filter Exclude matching paths -v, --verbose Verbosity level (1-5) #+end_example diff --git a/src/main/scala/net/kemitix/s3thorp/Config.scala b/src/main/scala/net/kemitix/s3thorp/Config.scala index 648726a..aa382a2 100644 --- a/src/main/scala/net/kemitix/s3thorp/Config.scala +++ b/src/main/scala/net/kemitix/s3thorp/Config.scala @@ -5,6 +5,7 @@ import java.io.File case class Config(bucket: Bucket = Bucket(""), prefix: RemoteKey = RemoteKey(""), verbose: Int = 1, + filter: Filter = Filter(), source: File ) { require(source.isDirectory, s"Source must be a directory: $source") diff --git a/src/main/scala/net/kemitix/s3thorp/Filter.scala b/src/main/scala/net/kemitix/s3thorp/Filter.scala new file mode 100644 index 0000000..339484c --- /dev/null +++ b/src/main/scala/net/kemitix/s3thorp/Filter.scala @@ -0,0 +1,15 @@ +package net.kemitix.s3thorp + +import java.nio.file.Path +import java.util.function.Predicate +import java.util.regex.Pattern + +case class Filter(filter: String = "!.*") { + + lazy val predicate: Predicate[String] = Pattern.compile(filter).asPredicate() + + def isIncluded(path: Path): Boolean = !isExcluded(path) + + def isExcluded(path: Path): Boolean = predicate.test(path.toString) + +} diff --git a/src/main/scala/net/kemitix/s3thorp/LocalFileStream.scala b/src/main/scala/net/kemitix/s3thorp/LocalFileStream.scala index 5a5b971..70916f1 100644 --- a/src/main/scala/net/kemitix/s3thorp/LocalFileStream.scala +++ b/src/main/scala/net/kemitix/s3thorp/LocalFileStream.scala @@ -10,7 +10,7 @@ trait LocalFileStream (implicit c: Config): Stream[LocalFile] = { log5(s"- Entering: $file") val files = for { - f <- dirPaths(file) + f <- dirPaths(file) filter { f => c.filter isIncluded f.toPath } fs <- recurseIntoSubDirectories(f) } yield fs log5(s"- Leaving: $file") diff --git a/src/main/scala/net/kemitix/s3thorp/ParseArgs.scala b/src/main/scala/net/kemitix/s3thorp/ParseArgs.scala index 0793ba4..6bcc0d5 100644 --- a/src/main/scala/net/kemitix/s3thorp/ParseArgs.scala +++ b/src/main/scala/net/kemitix/s3thorp/ParseArgs.scala @@ -25,6 +25,9 @@ object ParseArgs { opt[String]('p', "prefix") .action((str, c) => c.copy(prefix = RemoteKey(str))) .text("Prefix within the S3 Bucket"), + opt[String]('f', "filter") + .action((str,c) => c.copy(filter = Filter(str))) + .text("Exclude matching paths"), opt[Int]('v', "verbose") .validate(i => if (i >= 1 && i <= 5) Right(Unit) diff --git a/src/test/scala/net/kemitix/s3thorp/FilterSuite.scala b/src/test/scala/net/kemitix/s3thorp/FilterSuite.scala new file mode 100644 index 0000000..204158c --- /dev/null +++ b/src/test/scala/net/kemitix/s3thorp/FilterSuite.scala @@ -0,0 +1,44 @@ +package net.kemitix.s3thorp + +import java.nio.file.{Path, Paths} + +class FilterSuite extends UnitTest { + + describe("default filter") { + val filter = Filter() + val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file", + "/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir" + ) map { p => Paths.get(p)} + it("should not exclude files") { + paths.foreach(path => { assertResult(false)(filter.isExcluded(path)) }) + } + it("should include files") { + paths.foreach(path => assertResult(true)(filter.isIncluded(path))) + } + } + describe("directory exact match filter '/upload/subdir/'") { + val filter = Filter("/upload/subdir/") + it("exclude matching directory") { + val matching = Paths.get("/upload/subdir/leaf-file") + assertResult(true)(filter.isExcluded(matching)) + } + it("include non-matching files") { + val nonMatching = Paths.get("/upload/other-file") + assertResult(true)(filter.isIncluded(nonMatching)) + } + } + describe("file partial match 'root'") { + val filter = Filter("root") + it("exclude matching file '/upload/root-file") { + val matching = Paths.get("/upload/root-file") + assertResult(true)(filter.isExcluded(matching)) + } + it("include non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") { + val nonMatching1 = Paths.get("/test-file-for-hash.txt") + val nonMatching2 = Paths.get("/upload/subdir/leaf-file") + assertResult(true)(filter.isIncluded(nonMatching1)) + assertResult(true)(filter.isIncluded(nonMatching2)) + } + } + +} diff --git a/src/test/scala/net/kemitix/s3thorp/SyncSuite.scala b/src/test/scala/net/kemitix/s3thorp/SyncSuite.scala index ccfed62..daaa876 100644 --- a/src/test/scala/net/kemitix/s3thorp/SyncSuite.scala +++ b/src/test/scala/net/kemitix/s3thorp/SyncSuite.scala @@ -48,6 +48,8 @@ class SyncSuite val config = Config(Bucket("bucket"), RemoteKey("prefix"), source = source) val rootRemoteKey = RemoteKey("prefix/root-file") val leafRemoteKey = RemoteKey("prefix/subdir/leaf-file") + val rootHash = MD5Hash("a3a6ac11a0eb577b81b3bb5c95cc8a6e") + val leafHash = MD5Hash("208386a650bdec61cfcd7bd8dcb6b542") describe("when all files should be uploaded") { val sync = new RecordingSync(testBucket, new DummyS3Client {}, S3ObjectsData( byHash = Map(), @@ -70,8 +72,6 @@ class SyncSuite } } describe("when no files should be uploaded") { - val rootHash = MD5Hash("a3a6ac11a0eb577b81b3bb5c95cc8a6e") - val leafHash = MD5Hash("208386a650bdec61cfcd7bd8dcb6b542") val s3ObjectsData = S3ObjectsData( byHash = Map( rootHash -> Set(KeyModified(RemoteKey("prefix/root-file"), lastModified)), @@ -154,6 +154,17 @@ class SyncSuite assertResult(expected)(result) } } + describe("when a file is file is excluded") { + val filteredConfig = config.copy(filter = Filter("leaf"), verbose = 5) + val sync = new RecordingSync(testBucket, new DummyS3Client {}, S3ObjectsData(Map(), Map())) + sync.run(filteredConfig).unsafeRunSync + it("is not uploaded") { + val expectedUploads = Map( + "root-file" -> rootRemoteKey + ) + assertResult(expectedUploads)(sync.uploadsRecord) + } + } } class RecordingSync(testBucket: Bucket, s3Client: S3Client, s3ObjectsData: S3ObjectsData)