Include and Exclude behave more like the AWS CLI (#48)

* [domain] rename Filter as Include

* [cli]ParseArgs allow exclude and include parameters to be repeated

* [core] don't include include/exclude details in logging

* [domain] combine Include and Exclude into Filter

Config now collect includes and Excludes into a single list and passed
each file to the Filter.isIncluded method, with the list of Filters,
to determine if a file should be included.
This commit is contained in:
Paul Campbell 2019-06-08 20:31:20 +01:00 committed by GitHub
parent 7ffa386b29
commit 44c66c042c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 193 additions and 141 deletions

View file

@ -4,7 +4,8 @@ import java.nio.file.Paths
import cats.effect.IO import cats.effect.IO
import net.kemitix.s3thorp._ import net.kemitix.s3thorp._
import net.kemitix.s3thorp.domain.{Bucket, Config, Exclude, Filter, RemoteKey} import net.kemitix.s3thorp.domain.Filter.{Exclude, Include}
import net.kemitix.s3thorp.domain.{Bucket, Config, RemoteKey}
import scopt.OParser import scopt.OParser
import scopt.OParser.{builder, parse, sequence} import scopt.OParser.{builder, parse, sequence}
@ -27,11 +28,13 @@ object ParseArgs {
opt[String]('p', "prefix") opt[String]('p', "prefix")
.action((str, c) => c.copy(prefix = RemoteKey(str))) .action((str, c) => c.copy(prefix = RemoteKey(str)))
.text("Prefix within the S3 Bucket"), .text("Prefix within the S3 Bucket"),
opt[Seq[String]]('f', "filter") opt[Seq[String]]('i', "include")
.action((str, c) => c.copy(filters = str.map(Filter))) .unbounded()
.text("Filter only matching paths"), .action((str, c) => c.copy(filters = c.filters ++ str.map(Include)))
.text("Include only matching paths"),
opt[Seq[String]]('x', "exclude") opt[Seq[String]]('x', "exclude")
.action((str,c) => c.copy(excludes = str.map(Exclude))) .unbounded()
.action((str,c) => c.copy(filters = c.filters ++ str.map(Exclude)))
.text("Exclude matching paths"), .text("Exclude matching paths"),
opt[Int]('v', "verbose") opt[Int]('v', "verbose")
.validate(i => .validate(i =>

View file

@ -1,10 +1,11 @@
package net.kemitix.s3thorp.core package net.kemitix.s3thorp.core
import java.io.File import java.io.File
import java.nio.file.Path
import cats.effect.IO import cats.effect.IO
import net.kemitix.s3thorp.core.KeyGenerator.generateKey import net.kemitix.s3thorp.core.KeyGenerator.generateKey
import net.kemitix.s3thorp.domain.{Config, LocalFile, MD5Hash} import net.kemitix.s3thorp.domain.{Config, Filter, LocalFile, MD5Hash}
object LocalFileStream { object LocalFileStream {
@ -13,6 +14,8 @@ object LocalFileStream {
info: Int => String => Unit) info: Int => String => Unit)
(implicit c: Config): IO[Stream[LocalFile]] = { (implicit c: Config): IO[Stream[LocalFile]] = {
val filters: Path => Boolean = Filter.isIncluded(c.filters)
def loop(file: File): IO[Stream[LocalFile]] = { def loop(file: File): IO[Stream[LocalFile]] = {
def dirPaths(file: File): IO[Stream[File]] = def dirPaths(file: File): IO[Stream[File]] =
@ -22,7 +25,7 @@ object LocalFileStream {
} }
.map(fs => .map(fs =>
Stream(fs: _*) Stream(fs: _*)
.filter(isIncluded)) .filter(f => filters(f.toPath)))
def recurseIntoSubDirectories(file: File)(implicit c: Config): IO[Stream[LocalFile]] = def recurseIntoSubDirectories(file: File)(implicit c: Config): IO[Stream[LocalFile]] =
file match { file match {
@ -31,15 +34,6 @@ object LocalFileStream {
yield Stream(LocalFile(file, c.source, hash, generateKey(c.source, c.prefix))) yield Stream(LocalFile(file, c.source, hash, generateKey(c.source, c.prefix)))
} }
def filterIsIncluded(f: File): Boolean =
f.isDirectory || c.filters.forall { filter => filter isIncluded f.toPath }
def excludeIsIncluded(f: File): Boolean =
c.excludes.forall { exclude => exclude isIncluded f.toPath }
def isIncluded(f: File): Boolean =
filterIsIncluded(f) && excludeIsIncluded(f)
def recurse(fs: Stream[File]): IO[Stream[LocalFile]] = def recurse(fs: Stream[File]): IO[Stream[LocalFile]] =
fs.foldLeft(IO.pure(Stream.empty[LocalFile]))((acc, f) => fs.foldLeft(IO.pure(Stream.empty[LocalFile]))((acc, f) =>
recurseIntoSubDirectories(f) recurseIntoSubDirectories(f)

View file

@ -9,9 +9,7 @@ import net.kemitix.s3thorp.domain.Config
object SyncLogging { object SyncLogging {
def logRunStart[F[_]](info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO { def logRunStart[F[_]](info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO {
info(1)(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, " + info(1)(s"Bucket: ${c.bucket.name}, Prefix: ${c.prefix.key}, Source: ${c.source}, ")}
s"Filter: ${c.filters.map{ f => f.filter}.mkString(""", """)} " +
s"Exclude: ${c.excludes.map{ f => f.exclude}.mkString(""", """)}")}
def logFileScan(info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO{ def logFileScan(info: Int => String => Unit)(implicit c: Config): IO[Unit] = IO{
info(1)(s"Scanning local files: ${c.source}...")} info(1)(s"Scanning local files: ${c.source}...")}

View file

@ -7,6 +7,7 @@ import cats.effect.IO
import net.kemitix.s3thorp.aws.api.S3Action.{CopyS3Action, DeleteS3Action, UploadS3Action} import net.kemitix.s3thorp.aws.api.S3Action.{CopyS3Action, DeleteS3Action, UploadS3Action}
import net.kemitix.s3thorp.aws.api.{S3Client, UploadProgressListener} import net.kemitix.s3thorp.aws.api.{S3Client, UploadProgressListener}
import net.kemitix.s3thorp.core.MD5HashData.{leafHash, rootHash} import net.kemitix.s3thorp.core.MD5HashData.{leafHash, rootHash}
import net.kemitix.s3thorp.domain.Filter.Exclude
import net.kemitix.s3thorp.domain._ import net.kemitix.s3thorp.domain._
import org.scalatest.FunSpec import org.scalatest.FunSpec
@ -125,7 +126,7 @@ class SyncSuite
} }
} }
describe("when a file is excluded") { describe("when a file is excluded") {
val configWithExclusion = config.copy(excludes = List(Exclude("leaf"))) val configWithExclusion = config.copy(filters = List(Exclude("leaf")))
val s3ObjectsData = S3ObjectsData(Map(), Map()) val s3ObjectsData = S3ObjectsData(Map(), Map())
val s3Client = new RecordingClient(testBucket, s3ObjectsData) val s3Client = new RecordingClient(testBucket, s3ObjectsData)
Sync.run(s3Client, md5HashGenerator, logInfo, logWarn, logError)(configWithExclusion).unsafeRunSync Sync.run(s3Client, md5HashGenerator, logInfo, logWarn, logError)(configWithExclusion).unsafeRunSync

View file

@ -6,8 +6,7 @@ final case class Config(
bucket: Bucket = Bucket(""), bucket: Bucket = Bucket(""),
prefix: RemoteKey = RemoteKey(""), prefix: RemoteKey = RemoteKey(""),
verbose: Int = 1, verbose: Int = 1,
filters: Seq[Filter] = List(), filters: List[Filter] = List(),
excludes: Seq[Exclude] = List(),
multiPartThreshold: Long = 1024 * 1024 * 5, multiPartThreshold: Long = 1024 * 1024 * 5,
maxRetries: Int = 3, maxRetries: Int = 3,
source: File source: File

View file

@ -1,15 +0,0 @@
package net.kemitix.s3thorp.domain
import java.nio.file.Path
import java.util.function.Predicate
import java.util.regex.Pattern
final case class Exclude(exclude: String = "!.*") {
lazy val predicate: Predicate[String] = Pattern.compile(exclude).asPredicate()
def isIncluded(path: Path): Boolean = !isExcluded(path)
def isExcluded(path: Path): Boolean = predicate.test(path.toString)
}

View file

@ -1,16 +1,48 @@
package net.kemitix.s3thorp.domain package net.kemitix.s3thorp.domain
import java.nio.file.Path import java.nio.file.Path
import java.util.function.Predicate
import java.util.regex.Pattern import java.util.regex.Pattern
final case class Filter(filter: String = ".*") { sealed trait Filter
lazy val predicate: Predicate[String] = Pattern.compile(filter).asPredicate.negate object Filter {
def isIncluded(path: Path): Boolean = !isExcluded(path) case class Include(include: String = ".*") extends Filter {
private lazy val predicate = Pattern.compile(include).asPredicate
def isIncluded(path: Path): Boolean = predicate.test(path.toString)
}
case class Exclude(exclude: String) extends Filter {
private lazy val predicate = Pattern.compile(exclude).asPredicate()
def isExcluded(path: Path): Boolean = predicate.test(path.toString) def isExcluded(path: Path): Boolean = predicate.test(path.toString)
} }
def isIncluded(filters: List[Filter])(p: Path): Boolean = {
sealed trait State
case class Unknown() extends State
case class Accepted() extends State
case class Discarded() extends State
filters.foldRight(Unknown(): State)((filter, state) =>
(filter, state) match {
case (_, Accepted()) => Accepted()
case (_, Discarded()) => Discarded()
case (x: Exclude, _) if x.isExcluded(p) => Discarded()
case (i: Include, _) if i.isIncluded(p) => Accepted()
case _ => Unknown()
}) match {
case Accepted() => true
case Discarded() => false
case Unknown() => filters.forall {
case _: Include => false
case _ => true
}
}
}
}

View file

@ -1,46 +0,0 @@
package net.kemitix.s3thorp.domain
import java.nio.file.{Path, Paths}
import org.scalatest.FunSpec
class ExcludeSuite extends FunSpec {
describe("default exclude") {
val exclude = Exclude()
val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file",
"/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir"
) map { p => Paths.get(p)}
it("should not exclude files") {
paths.foreach(path => { assertResult(false)(exclude.isExcluded(path)) })
}
it("should include files") {
paths.foreach(path => assertResult(true)(exclude.isIncluded(path)))
}
}
describe("directory exact match exclude '/upload/subdir/'") {
val exclude = Exclude("/upload/subdir/")
it("exclude matching directory") {
val matching = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(exclude.isExcluded(matching))
}
it("include non-matching files") {
val nonMatching = Paths.get("/upload/other-file")
assertResult(true)(exclude.isIncluded(nonMatching))
}
}
describe("file partial match 'root'") {
val exclude = Exclude("root")
it("exclude matching file '/upload/root-file") {
val matching = Paths.get("/upload/root-file")
assertResult(true)(exclude.isExcluded(matching))
}
it("include non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") {
val nonMatching1 = Paths.get("/test-file-for-hash.txt")
val nonMatching2 = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(exclude.isIncluded(nonMatching1))
assertResult(true)(exclude.isIncluded(nonMatching2))
}
}
}

View file

@ -1,46 +0,0 @@
package net.kemitix.s3thorp.domain
import java.nio.file.{Path, Paths}
import org.scalatest.FunSpec
class FilterSuite extends FunSpec {
describe("default filter") {
val filter = Filter()
val paths: List[Path] = List("/a-file", "a-file", "path/to/a/file", "/path/to/a/file",
"/home/pcampbell/repos/kemitix/s3thorp/target/scala-2.12/test-classes/net/kemitix/s3thorp/upload/subdir"
) map { p => Paths.get(p)}
it("should not exclude files") {
paths.foreach(path => { assertResult(false)(filter.isExcluded(path)) })
}
it("should include files") {
paths.foreach(path => assertResult(true)(filter.isIncluded(path)))
}
}
describe("directory exact match include '/upload/subdir/'") {
val filter = Filter("/upload/subdir/")
it("include matching directory") {
val matching = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(filter.isIncluded(matching))
}
it("exclude non-matching files") {
val nonMatching = Paths.get("/upload/other-file")
assertResult(true)(filter.isExcluded(nonMatching))
}
}
describe("file partial match 'root'") {
val filter = Filter("root")
it("include matching file '/upload/root-file") {
val matching = Paths.get("/upload/root-file")
assertResult(true)(filter.isIncluded(matching))
}
it("exclude non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") {
val nonMatching1 = Paths.get("/test-file-for-hash.txt")
val nonMatching2 = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(filter.isExcluded(nonMatching1))
assertResult(true)(filter.isExcluded(nonMatching2))
}
}
}

View file

@ -0,0 +1,132 @@
package net.kemitix.s3thorp.domain
import java.nio.file.{Path, Paths}
import net.kemitix.s3thorp.domain.Filter.{Exclude, Include}
import org.scalatest.FunSpec
class FiltersSuite extends FunSpec {
private val path1 = "a-file"
private val path2 = "another-file.txt"
private val path3 = "/path/to/a/file.txt"
private val path4 = "/path/to/another/file"
private val path5 = "/home/pcampbell/repos/kemitix/s3thorp"
private val path6 = "/kemitix/s3thorp/upload/subdir"
private val paths = List(path1, path2, path3, path4, path5, path6).map(Paths.get(_))
describe("Include") {
describe("default filter") {
val include = Include()
it("should include files") {
paths.foreach(path => assertResult(true)(include.isIncluded(path)))
}
}
describe("directory exact match include '/upload/subdir/'") {
val include = Include("/upload/subdir/")
it("include matching directory") {
val matching = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(include.isIncluded(matching))
}
it("exclude non-matching files") {
val nonMatching = Paths.get("/upload/other-file")
assertResult(false)(include.isIncluded(nonMatching))
}
}
describe("file partial match 'root'") {
val include = Include("root")
it("include matching file '/upload/root-file") {
val matching = Paths.get("/upload/root-file")
assertResult(true)(include.isIncluded(matching))
}
it("exclude non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") {
val nonMatching1 = Paths.get("/test-file-for-hash.txt")
val nonMatching2 = Paths.get("/upload/subdir/leaf-file")
assertResult(false)(include.isIncluded(nonMatching1))
assertResult(false)(include.isIncluded(nonMatching2))
}
}
}
describe("Exclude") {
// describe("default exclude") {
// val exclude = Exclude()
// it("should exclude all files") {
// paths.foreach(path => {
// assertResult(true)(exclude.isExcluded(path))
// })
// }
// }
describe("directory exact match exclude '/upload/subdir/'") {
val exclude = Exclude("/upload/subdir/")
it("exclude matching directory") {
val matching = Paths.get("/upload/subdir/leaf-file")
assertResult(true)(exclude.isExcluded(matching))
}
it("include non-matching files") {
val nonMatching = Paths.get("/upload/other-file")
assertResult(false)(exclude.isExcluded(nonMatching))
}
}
describe("file partial match 'root'") {
val exclude = Exclude("root")
it("exclude matching file '/upload/root-file") {
val matching = Paths.get("/upload/root-file")
assertResult(true)(exclude.isExcluded(matching))
}
it("include non-matching files 'test-file-for-hash.txt' & '/upload/subdir/leaf-file'") {
val nonMatching1 = Paths.get("/test-file-for-hash.txt")
val nonMatching2 = Paths.get("/upload/subdir/leaf-file")
assertResult(false)(exclude.isExcluded(nonMatching1))
assertResult(false)(exclude.isExcluded(nonMatching2))
}
}
}
describe("isIncluded") {
def invoke(filters: List[Filter]) = {
paths.filter(path => Filter.isIncluded(filters)(path))
}
describe("when there are no filters") {
val filters = List[Filter]()
it("should accept all files") {
val expected = paths
val result = invoke(filters)
assertResult(expected)(result)
}
}
describe("when a single include") {
val filters = List(Include(".txt"))
it("should only include two matching paths") {
val expected = List(path2, path3).map(Paths.get(_))
val result = invoke(filters)
assertResult(expected)(result)
}
}
describe("when a single exclude") {
val filters = List(Exclude("path"))
it("should include only other paths") {
val expected = List(path1, path2, path5, path6).map(Paths.get(_))
val result = invoke(filters)
assertResult(expected)(result)
}
}
describe("when include .txt files, but then exclude everything trumps all") {
val filters = List(Include(".txt"), Exclude(".*"))
it("should include nothing") {
val expected = List()
val result = invoke(filters)
assertResult(expected)(result)
}
}
describe("when exclude everything except .txt files") {
val filters = List(Exclude(".*"), Include(".txt"))
it("should include only the .txt files") {
val expected = List(path2, path3).map(Paths.get(_))
val result = invoke(filters)
assertResult(expected)(result)
}
}
}
}