From 44c66c042cc591cc51976089a4b0322a565de60d Mon Sep 17 00:00:00 2001 From: Paul Campbell Date: Sat, 8 Jun 2019 20:31:20 +0100 Subject: [PATCH] Include and Exclude behave more like the AWS CLI (#48) * [domain] rename Filter as Include * [cli]ParseArgs allow exclude and include parameters to be repeated * [core] don't include include/exclude details in logging * [domain] combine Include and Exclude into Filter Config now collect includes and Excludes into a single list and passed each file to the Filter.isIncluded method, with the list of Filters, to determine if a file should be included. --- .../net/kemitix/s3thorp/cli/ParseArgs.scala | 13 +- .../LocalFileStream.scala | 16 +-- .../SyncLogging.scala | 4 +- .../net/kemitix/s3thorp/core/SyncSuite.scala | 3 +- .../net/kemitix/s3thorp/domain/Config.scala | 15 +- .../net/kemitix/s3thorp/domain/Exclude.scala | 15 -- .../net/kemitix/s3thorp/domain/Filter.scala | 44 +++++- .../kemitix/s3thorp/domain/ExcludeSuite.scala | 46 ------ .../kemitix/s3thorp/domain/FilterSuite.scala | 46 ------ .../kemitix/s3thorp/domain/FiltersSuite.scala | 132 ++++++++++++++++++ 10 files changed, 193 insertions(+), 141 deletions(-) delete mode 100644 domain/src/main/scala/net/kemitix/s3thorp/domain/Exclude.scala delete mode 100644 domain/src/test/scala/net/kemitix/s3thorp/domain/ExcludeSuite.scala delete mode 100644 domain/src/test/scala/net/kemitix/s3thorp/domain/FilterSuite.scala create mode 100644 domain/src/test/scala/net/kemitix/s3thorp/domain/FiltersSuite.scala diff --git a/cli/src/main/scala/net/kemitix/s3thorp/cli/ParseArgs.scala b/cli/src/main/scala/net/kemitix/s3thorp/cli/ParseArgs.scala index 14f472e..77f7129 100644 --- a/cli/src/main/scala/net/kemitix/s3thorp/cli/ParseArgs.scala +++ b/cli/src/main/scala/net/kemitix/s3thorp/cli/ParseArgs.scala @@ -4,7 +4,8 @@ import java.nio.file.Paths import cats.effect.IO import net.kemitix.s3thorp._ -import net.kemitix.s3thorp.domain.{Bucket, Config, Exclude, Filter, RemoteKey} +import net.kemitix.s3thorp.domain.Filter.{Exclude, Include} +import net.kemitix.s3thorp.domain.{Bucket, Config, RemoteKey} import scopt.OParser import scopt.OParser.{builder, parse, sequence} @@ -27,11 +28,13 @@ object ParseArgs { opt[String]('p', "prefix") .action((str, c) => c.copy(prefix = RemoteKey(str))) .text("Prefix within the S3 Bucket"), - opt[Seq[String]]('f', "filter") - .action((str, c) => c.copy(filters = str.map(Filter))) - .text("Filter only matching paths"), + opt[Seq[String]]('i', "include") + .unbounded() + .action((str, c) => c.copy(filters = c.filters ++ str.map(Include))) + .text("Include only matching paths"), opt[Seq[String]]('x', "exclude") - .action((str,c) => c.copy(excludes = str.map(Exclude))) + .unbounded() + .action((str,c) => c.copy(filters = c.filters ++ str.map(Exclude))) .text("Exclude matching paths"), opt[Int]('v', "verbose") .validate(i => diff --git a/core/src/main/scala/net.kemitix.s3thorp.core/LocalFileStream.scala b/core/src/main/scala/net.kemitix.s3thorp.core/LocalFileStream.scala index 1b108e1..e931ac7 100644 --- a/core/src/main/scala/net.kemitix.s3thorp.core/LocalFileStream.scala +++ b/core/src/main/scala/net.kemitix.s3thorp.core/LocalFileStream.scala @@ -1,10 +1,11 @@ package net.kemitix.s3thorp.core import java.io.File +import java.nio.file.Path import cats.effect.IO import net.kemitix.s3thorp.core.KeyGenerator.generateKey -import net.kemitix.s3thorp.domain.{Config, LocalFile, MD5Hash} +import net.kemitix.s3thorp.domain.{Config, Filter, LocalFile, MD5Hash} object LocalFileStream { @@ -13,6 +14,8 @@ object LocalFileStream { info: Int => String => Unit) (implicit c: Config): IO[Stream[LocalFile]] = { + val filters: Path => Boolean = Filter.isIncluded(c.filters) + def loop(file: File): IO[Stream[LocalFile]] = { def dirPaths(file: File): IO[Stream[File]] = @@ -22,7 +25,7 @@ object LocalFileStream { } .map(fs => Stream(fs: _*) - .filter(isIncluded)) + .filter(f => filters(f.toPath))) def recurseIntoSubDirectories(file: File)(implicit c: Config): IO[Stream[LocalFile]] = file match { @@ -31,15 +34,6 @@ object LocalFileStream { yield Stream(LocalFile(file, c.source, hash, generateKey(c.source, c.prefix))) } - def filterIsIncluded(f: File): Boolean = - f.isDirectory || c.filters.forall { filter => filter isIncluded f.toPath } - - def excludeIsIncluded(f: File): Boolean = - c.excludes.forall { exclude => exclude isIncluded f.toPath } - - def isIncluded(f: File): Boolean = - filterIsIncluded(f) && excludeIsIncluded(f) - def recurse(fs: Stream[File]): IO[Stream[LocalFile]] = fs.foldLeft(IO.pure(Stream.empty[LocalFile]))((acc, f) => recurseIntoSubDirectories(f) diff --git a/core/src/main/scala/net.kemitix.s3thorp.core/SyncLogging.scala b/core/src/main/scala/net.kemitix.s3thorp.core/SyncLogging.scala index a0838fc..1b1137b 100644 --- a/core/src/main/scala/net.kemitix.s3thorp.core/SyncLogging.scala +++ b/core/src/main/scala/net.kemitix.s3thorp.core/SyncLogging.scala @@ -9,9 +9,7 @@ import net.kemitix.s3thorp.domain.Config object SyncLogging { def logRunStart[F[_]](info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO { - info(1)(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, " + - s"Filter: ${c.filters.map{ f => f.filter}.mkString(""", """)} " + - s"Exclude: ${c.excludes.map{ f => f.exclude}.mkString(""", """)}")} + info(1)(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, ")} def logFileScan(info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO{ info(1)(s"Scanning local files: ${c.source}...")} diff --git a/core/src/test/scala/net/kemitix/s3thorp/core/SyncSuite.scala b/core/src/test/scala/net/kemitix/s3thorp/core/SyncSuite.scala index 869aef4..34cb296 100644 --- a/core/src/test/scala/net/kemitix/s3thorp/core/SyncSuite.scala +++ b/core/src/test/scala/net/kemitix/s3thorp/core/SyncSuite.scala @@ -7,6 +7,7 @@ import cats.effect.IO import net.kemitix.s3thorp.aws.api.S3Action.{CopyS3Action, DeleteS3Action, UploadS3Action} import net.kemitix.s3thorp.aws.api.{S3Client, UploadProgressListener} import net.kemitix.s3thorp.core.MD5HashData.{leafHash, rootHash} +import net.kemitix.s3thorp.domain.Filter.Exclude import net.kemitix.s3thorp.domain._ import org.scalatest.FunSpec @@ -125,7 +126,7 @@ class SyncSuite } } describe("when a file is excluded") { - val configWithExclusion = config.copy(excludes = List(Exclude("leaf"))) + val configWithExclusion = config.copy(filters = List(Exclude("leaf"))) val s3ObjectsData = S3ObjectsData(Map(), Map()) val s3Client = new RecordingClient(testBucket, s3ObjectsData) Sync.run(s3Client, md5HashGenerator, logInfo, logWarn, logError)(configWithExclusion).unsafeRunSync diff --git a/domain/src/main/scala/net/kemitix/s3thorp/domain/Config.scala b/domain/src/main/scala/net/kemitix/s3thorp/domain/Config.scala index 6ebb9be..7081f4a 100644 --- a/domain/src/main/scala/net/kemitix/s3thorp/domain/Config.scala +++ b/domain/src/main/scala/net/kemitix/s3thorp/domain/Config.scala @@ -3,14 +3,13 @@ package net.kemitix.s3thorp.domain import java.io.File final case class Config( - bucket: Bucket = Bucket(""), - prefix: RemoteKey = RemoteKey(""), - verbose: Int = 1, - filters: Seq[Filter] = List(), - excludes: Seq[Exclude] = List(), - multiPartThreshold: Long = 1024 * 1024 * 5, - maxRetries: Int = 3, - source: File + bucket: Bucket = Bucket(""), + prefix: RemoteKey = RemoteKey(""), + verbose: Int = 1, + filters: List[Filter] = List(), + multiPartThreshold: Long = 1024 * 1024 * 5, + maxRetries: Int = 3, + source: File ) { require(source.isDirectory, s"Source must be a directory: $source") require(multiPartThreshold >= 1024 * 1024 * 5, s"Threshold for multi-part upload is 5Mb: '$multiPartThreshold'") diff --git a/domain/src/main/scala/net/kemitix/s3thorp/domain/Exclude.scala b/domain/src/main/scala/net/kemitix/s3thorp/domain/Exclude.scala deleted file mode 100644 index 88a1cd3..0000000 --- a/domain/src/main/scala/net/kemitix/s3thorp/domain/Exclude.scala +++ /dev/null @@ -1,15 +0,0 @@ -package net.kemitix.s3thorp.domain - -import java.nio.file.Path -import java.util.function.Predicate -import java.util.regex.Pattern - -final case class Exclude(exclude: String = "!.*") { - - lazy val predicate: Predicate[String] = Pattern.compile(exclude).asPredicate() - - def isIncluded(path: Path): Boolean = !isExcluded(path) - - def isExcluded(path: Path): Boolean = predicate.test(path.toString) - -} diff --git a/domain/src/main/scala/net/kemitix/s3thorp/domain/Filter.scala b/domain/src/main/scala/net/kemitix/s3thorp/domain/Filter.scala index 9add298..da87200 100644 --- a/domain/src/main/scala/net/kemitix/s3thorp/domain/Filter.scala +++ b/domain/src/main/scala/net/kemitix/s3thorp/domain/Filter.scala @@ -1,16 +1,48 @@ package net.kemitix.s3thorp.domain import java.nio.file.Path -import java.util.function.Predicate import java.util.regex.Pattern -final case class Filter(filter: String = ".*") { +sealed trait Filter - lazy val predicate: Predicate[String] = Pattern.compile(filter).asPredicate.negate +object Filter { - def isIncluded(path: Path): Boolean = !isExcluded(path) + case class Include(include: String = ".*") extends Filter { - def isExcluded(path: Path): Boolean = predicate.test(path.toString) + private lazy val predicate = Pattern.compile(include).asPredicate + + def isIncluded(path: Path): Boolean = predicate.test(path.toString) + + } + + case class Exclude(exclude: String) extends Filter { + + private lazy val predicate = Pattern.compile(exclude).asPredicate() + + def isExcluded(path: Path): Boolean = predicate.test(path.toString) + + } + + def isIncluded(filters: List[Filter])(p: Path): Boolean = { + sealed trait State + case class Unknown() extends State + case class Accepted() extends State + case class Discarded() extends State + filters.foldRight(Unknown(): State)((filter, state) => + (filter, state) match { + case (_, Accepted()) => Accepted() + case (_, Discarded()) => Discarded() + case (x: Exclude, _) if x.isExcluded(p) => Discarded() + case (i: Include, _) if i.isIncluded(p) => Accepted() + case _ => Unknown() + }) match { + case Accepted() => true + case Discarded() => false + case Unknown() => filters.forall { + case _: Include => false + case _ => true + } + } + } } - diff --git a/domain/src/test/scala/net/kemitix/s3thorp/domain/ExcludeSuite.scala b/domain/src/test/scala/net/kemitix/s3thorp/domain/ExcludeSuite.scala deleted file mode 100644 index 49cbf58..0000000 --- a/domain/src/test/scala/net/kemitix/s3thorp/domain/ExcludeSuite.scala +++ /dev/null @@ -1,46 +0,0 @@ -package net.kemitix.s3thorp.domain - -import java.nio.file.{Path, Paths} - -import org.scalatest.FunSpec - -class ExcludeSuite extends FunSpec { - - describe("default exclude") { - val exclude = Exclude() - val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file", - "/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir" - ) map { p => Paths.get(p)} - it("should not exclude files") { - paths.foreach(path => { assertResult(false)(exclude.isExcluded(path)) }) - } - it("should include files") { - paths.foreach(path => assertResult(true)(exclude.isIncluded(path))) - } - } - describe("directory exact match exclude '/upload/subdir/'") { - val exclude = Exclude("/upload/subdir/") - it("exclude matching directory") { - val matching = Paths.get("/upload/subdir/leaf-file") - assertResult(true)(exclude.isExcluded(matching)) - } - it("include non-matching files") { - val nonMatching = Paths.get("/upload/other-file") - assertResult(true)(exclude.isIncluded(nonMatching)) - } - } - describe("file partial match 'root'") { - val exclude = Exclude("root") - it("exclude matching file '/upload/root-file") { - val matching = Paths.get("/upload/root-file") - assertResult(true)(exclude.isExcluded(matching)) - } - it("include non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") { - val nonMatching1 = Paths.get("/test-file-for-hash.txt") - val nonMatching2 = Paths.get("/upload/subdir/leaf-file") - assertResult(true)(exclude.isIncluded(nonMatching1)) - assertResult(true)(exclude.isIncluded(nonMatching2)) - } - } - -} diff --git a/domain/src/test/scala/net/kemitix/s3thorp/domain/FilterSuite.scala b/domain/src/test/scala/net/kemitix/s3thorp/domain/FilterSuite.scala deleted file mode 100644 index 228e2e0..0000000 --- a/domain/src/test/scala/net/kemitix/s3thorp/domain/FilterSuite.scala +++ /dev/null @@ -1,46 +0,0 @@ -package net.kemitix.s3thorp.domain - -import java.nio.file.{Path, Paths} - -import org.scalatest.FunSpec - -class FilterSuite extends FunSpec { - - describe("default filter") { - val filter = Filter() - val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file", - "/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir" - ) map { p => Paths.get(p)} - it("should not exclude files") { - paths.foreach(path => { assertResult(false)(filter.isExcluded(path)) }) - } - it("should include files") { - paths.foreach(path => assertResult(true)(filter.isIncluded(path))) - } - } - describe("directory exact match include '/upload/subdir/'") { - val filter = Filter("/upload/subdir/") - it("include matching directory") { - val matching = Paths.get("/upload/subdir/leaf-file") - assertResult(true)(filter.isIncluded(matching)) - } - it("exclude non-matching files") { - val nonMatching = Paths.get("/upload/other-file") - assertResult(true)(filter.isExcluded(nonMatching)) - } - } - describe("file partial match 'root'") { - val filter = Filter("root") - it("include matching file '/upload/root-file") { - val matching = Paths.get("/upload/root-file") - assertResult(true)(filter.isIncluded(matching)) - } - it("exclude non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") { - val nonMatching1 = Paths.get("/test-file-for-hash.txt") - val nonMatching2 = Paths.get("/upload/subdir/leaf-file") - assertResult(true)(filter.isExcluded(nonMatching1)) - assertResult(true)(filter.isExcluded(nonMatching2)) - } - } - -} diff --git a/domain/src/test/scala/net/kemitix/s3thorp/domain/FiltersSuite.scala b/domain/src/test/scala/net/kemitix/s3thorp/domain/FiltersSuite.scala new file mode 100644 index 0000000..264b43a --- /dev/null +++ b/domain/src/test/scala/net/kemitix/s3thorp/domain/FiltersSuite.scala @@ -0,0 +1,132 @@ +package net.kemitix.s3thorp.domain + +import java.nio.file.{Path, Paths} + +import net.kemitix.s3thorp.domain.Filter.{Exclude, Include} +import org.scalatest.FunSpec + +class FiltersSuite extends FunSpec { + + private val path1 = "a-file" + private val path2 = "another-file.txt" + private val path3 = "/path/to/a/file.txt" + private val path4 = "/path/to/another/file" + private val path5 = "/home/pcampbell/repos/kemitix/s3thorp" + private val path6 = "/kemitix/s3thorp/upload/subdir" + private val paths = List(path1, path2, path3, path4, path5, path6).map(Paths.get(_)) + + describe("Include") { + + describe("default filter") { + val include = Include() + it("should include files") { + paths.foreach(path => assertResult(true)(include.isIncluded(path))) + } + } + describe("directory exact match include '/upload/subdir/'") { + val include = Include("/upload/subdir/") + it("include matching directory") { + val matching = Paths.get("/upload/subdir/leaf-file") + assertResult(true)(include.isIncluded(matching)) + } + it("exclude non-matching files") { + val nonMatching = Paths.get("/upload/other-file") + assertResult(false)(include.isIncluded(nonMatching)) + } + } + describe("file partial match 'root'") { + val include = Include("root") + it("include matching file '/upload/root-file") { + val matching = Paths.get("/upload/root-file") + assertResult(true)(include.isIncluded(matching)) + } + it("exclude non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") { + val nonMatching1 = Paths.get("/test-file-for-hash.txt") + val nonMatching2 = Paths.get("/upload/subdir/leaf-file") + assertResult(false)(include.isIncluded(nonMatching1)) + assertResult(false)(include.isIncluded(nonMatching2)) + } + } + } + + describe("Exclude") { +// describe("default exclude") { +// val exclude = Exclude() +// it("should exclude all files") { +// paths.foreach(path => { +// assertResult(true)(exclude.isExcluded(path)) +// }) +// } +// } + describe("directory exact match exclude '/upload/subdir/'") { + val exclude = Exclude("/upload/subdir/") + it("exclude matching directory") { + val matching = Paths.get("/upload/subdir/leaf-file") + assertResult(true)(exclude.isExcluded(matching)) + } + it("include non-matching files") { + val nonMatching = Paths.get("/upload/other-file") + assertResult(false)(exclude.isExcluded(nonMatching)) + } + } + describe("file partial match 'root'") { + val exclude = Exclude("root") + it("exclude matching file '/upload/root-file") { + val matching = Paths.get("/upload/root-file") + assertResult(true)(exclude.isExcluded(matching)) + } + it("include non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") { + val nonMatching1 = Paths.get("/test-file-for-hash.txt") + val nonMatching2 = Paths.get("/upload/subdir/leaf-file") + assertResult(false)(exclude.isExcluded(nonMatching1)) + assertResult(false)(exclude.isExcluded(nonMatching2)) + } + } + } + describe("isIncluded") { + def invoke(filters: List[Filter]) = { + paths.filter(path => Filter.isIncluded(filters)(path)) + } + + describe("when there are no filters") { + val filters = List[Filter]() + it("should accept all files") { + val expected = paths + val result = invoke(filters) + assertResult(expected)(result) + } + } + describe("when a single include") { + val filters = List(Include(".txt")) + it("should only include two matching paths") { + val expected = List(path2, path3).map(Paths.get(_)) + val result = invoke(filters) + assertResult(expected)(result) + } + } + describe("when a single exclude") { + val filters = List(Exclude("path")) + it("should include only other paths") { + val expected = List(path1, path2, path5, path6).map(Paths.get(_)) + val result = invoke(filters) + assertResult(expected)(result) + } + } + describe("when include .txt files, but then exclude everything trumps all") { + val filters = List(Include(".txt"), Exclude(".*")) + it("should include nothing") { + val expected = List() + val result = invoke(filters) + assertResult(expected)(result) + } + } + describe("when exclude everything except .txt files") { + val filters = List(Exclude(".*"), Include(".txt")) + it("should include only the .txt files") { + val expected = List(path2, path3).map(Paths.get(_)) + val result = invoke(filters) + assertResult(expected)(result) + } + } + } +}