Create and use a cache of hashes for local files (#249)
* [domain] Define Hashes in domain package * [filesystem] Load and parse any .thorp.cache files found * [filesystem] Use cached file data when available and up-to-date * [lib] FileScanner refactoring * [filesystem] scan sub-dirs first to minimise time cache is on heap * [filesystem] Write new cache data to temp file * [lib] replace cache file when finished updating * [filesystem] AppendLines to correct file with new lines * [domain] decode HashType from String * [filesystem] Store last modified time as epoch milliseconds * [filesystem] parse lastmodified as a long * [filesystem] use all hash values in cache * [lib] FileScanner rearrange code * [lib] Create and use a single cache file per source * [storage-aws] Use ETag hash from cache when available * [filesystem] Merge file data together correctly * [filesystem] Handle exceptions thrown by Files.mode correctly * [readme] Add section on caching * [changelog] updated * [changelog] add pending dependencies notes * [lib] Filters should not name methods after their defining object * [lib] Fix up test
This commit is contained in:
parent
ed1f0ec7ee
commit
f35ea9795d
15 changed files with 421 additions and 85 deletions
|
@ -6,6 +6,17 @@ The format is based on [[https://keepachangelog.com/en/1.0.0/][Keep a Changelog]
|
|||
[[https://semver.org/spec/v2.0.0.html][Semantic Versioning]].
|
||||
|
||||
|
||||
* [1.0.0] - ???
|
||||
|
||||
** Added
|
||||
|
||||
- Create and use a cache of hashes for local files (#249)
|
||||
|
||||
** Dependencies
|
||||
|
||||
- Revert "[sbt] Rollback ~sbt-ci-release~ to ~1.3.2~ (#231)"
|
||||
- Update ~sbt~ to ~1.3.3~ (#238)
|
||||
|
||||
* [0.10.0] - 2019-10-08
|
||||
|
||||
This is the last ~v0.x~ feature release. The next feature release will be ~v1.x~.
|
||||
|
|
|
@ -64,6 +64,10 @@ that can be written to a file.
|
|||
Note, that ~include~ and ~exclude~ are cumulative across all
|
||||
configuration files.
|
||||
|
||||
* Caching
|
||||
|
||||
The last modified time for files is used to decide whether to calculate the hash values for the file. If a file has not been updated, then the hash values stored in the `.thorp.cache` file located in the root of the source is used. Otherwise the file will be read to caculate the the new hashes.
|
||||
|
||||
* Behaviour
|
||||
|
||||
When considering a local file, the following table governs what should happen:
|
||||
|
|
|
@ -8,7 +8,7 @@ import net.kemitix.thorp.domain.Implicits._
|
|||
final case class LocalFile private (
|
||||
file: File,
|
||||
source: File,
|
||||
hashes: Map[HashType, MD5Hash],
|
||||
hashes: Hashes,
|
||||
remoteKey: RemoteKey,
|
||||
length: Long
|
||||
)
|
||||
|
|
|
@ -36,7 +36,7 @@ object RemoteObjects {
|
|||
|
||||
def remoteHasHash(
|
||||
remoteObjects: RemoteObjects,
|
||||
hashes: Map[HashType, MD5Hash]
|
||||
hashes: Hashes
|
||||
): UIO[Option[(RemoteKey, MD5Hash)]] =
|
||||
UIO(remoteObjects.byHash.collectFirst {
|
||||
case (hash, key) if (hashes.values.exists(h => h == hash)) => (key, hash)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
package net.kemitix.thorp
|
||||
|
||||
import java.time.Instant
|
||||
|
||||
package object domain {
|
||||
type Hashes = Map[HashType, MD5Hash]
|
||||
type LastModified = Instant
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
package net.kemitix.thorp.filesystem
|
||||
|
||||
import net.kemitix.thorp.domain.{Hashes, LastModified}
|
||||
|
||||
case class FileData(
|
||||
hashes: Hashes,
|
||||
lastModified: LastModified
|
||||
) {
|
||||
def +(other: FileData): FileData = {
|
||||
FileData(
|
||||
hashes = this.hashes ++ other.hashes,
|
||||
lastModified = lastModified // discards other.lastModified
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object FileData {
|
||||
def create(hashes: Hashes, lastModified: LastModified): FileData = FileData(
|
||||
hashes = hashes,
|
||||
lastModified = lastModified
|
||||
)
|
||||
}
|
|
@ -1,10 +1,11 @@
|
|||
package net.kemitix.thorp.filesystem
|
||||
|
||||
import java.io.{File, FileInputStream}
|
||||
import java.nio.file.{Files, Path}
|
||||
import java.io.{File, FileInputStream, FileWriter}
|
||||
import java.nio.file.{Files, Path, StandardCopyOption}
|
||||
import java.time.Instant
|
||||
import java.util.stream
|
||||
|
||||
import net.kemitix.thorp.domain.{RemoteKey, Sources}
|
||||
import net.kemitix.thorp.domain.{Hashes, RemoteKey, Sources}
|
||||
import zio._
|
||||
|
||||
import scala.jdk.CollectionConverters._
|
||||
|
@ -19,12 +20,19 @@ object FileSystem {
|
|||
def openManagedFileInputStream(file: File, offset: Long)
|
||||
: RIO[FileSystem, ZManaged[Any, Throwable, FileInputStream]]
|
||||
def fileLines(file: File): RIO[FileSystem, Seq[String]]
|
||||
def appendLines(lines: Iterable[String], file: File): UIO[Unit]
|
||||
def isDirectory(file: File): RIO[FileSystem, Boolean]
|
||||
def listFiles(path: Path): UIO[List[File]]
|
||||
def listDirs(path: Path): UIO[List[Path]]
|
||||
def length(file: File): ZIO[FileSystem, Nothing, Long]
|
||||
def lastModified(file: File): UIO[Instant]
|
||||
def hasLocalFile(sources: Sources,
|
||||
prefix: RemoteKey,
|
||||
remoteKey: RemoteKey): ZIO[FileSystem, Nothing, Boolean]
|
||||
def findCache(
|
||||
directory: Path): ZIO[FileSystem with Hasher, Nothing, PathCache]
|
||||
def getHashes(path: Path, fileData: FileData): ZIO[FileSystem, Any, Hashes]
|
||||
def moveFile(source: Path, target: Path): UIO[Unit]
|
||||
}
|
||||
trait Live extends FileSystem {
|
||||
override val filesystem: Service = new Service {
|
||||
|
@ -59,12 +67,28 @@ object FileSystem {
|
|||
Task(file.isDirectory)
|
||||
|
||||
override def listFiles(path: Path): UIO[List[File]] =
|
||||
Task(List.from(path.toFile.listFiles()))
|
||||
.catchAll(_ => UIO.succeed(List.empty[File]))
|
||||
Task {
|
||||
List
|
||||
.from(path.toFile.listFiles())
|
||||
.filterNot(_.isDirectory)
|
||||
.filterNot(_.getName.contentEquals(PathCache.fileName))
|
||||
.filterNot(_.getName.contentEquals(PathCache.tempFileName))
|
||||
}.catchAll(_ => UIO.succeed(List.empty[File]))
|
||||
|
||||
override def listDirs(path: Path): UIO[List[Path]] =
|
||||
Task(
|
||||
List
|
||||
.from(path.toFile.listFiles())
|
||||
.filter(_.isDirectory)
|
||||
.map(_.toPath))
|
||||
.catchAll(_ => UIO.succeed(List.empty[Path]))
|
||||
|
||||
override def length(file: File): ZIO[FileSystem, Nothing, Long] =
|
||||
UIO(file.length)
|
||||
|
||||
override def lastModified(file: File): UIO[Instant] =
|
||||
UIO(Instant.ofEpochMilli(file.lastModified()))
|
||||
|
||||
override def hasLocalFile(
|
||||
sources: Sources,
|
||||
prefix: RemoteKey,
|
||||
|
@ -77,6 +101,40 @@ object FileSystem {
|
|||
.map(_ || accExists)
|
||||
}
|
||||
}
|
||||
|
||||
override def findCache(
|
||||
directory: Path): ZIO[FileSystem with Hasher, Nothing, PathCache] =
|
||||
for {
|
||||
cacheFile <- UIO(directory.resolve(PathCache.fileName).toFile)
|
||||
lines <- fileLines(cacheFile).catchAll(_ => UIO(List.empty))
|
||||
cache <- PathCache.fromLines(lines)
|
||||
} yield cache
|
||||
|
||||
override def getHashes(
|
||||
path: Path,
|
||||
fileData: FileData): ZIO[FileSystem, Any, Hashes] = {
|
||||
val lastModified = Instant.ofEpochMilli(path.toFile.lastModified())
|
||||
if (lastModified.isAfter(fileData.lastModified)) {
|
||||
ZIO.fail("fileData is out-of-date")
|
||||
} else {
|
||||
ZIO.succeed(fileData.hashes)
|
||||
}
|
||||
}
|
||||
|
||||
override def appendLines(lines: Iterable[String], file: File): UIO[Unit] =
|
||||
UIO.bracket(UIO(new FileWriter(file, true)))(fw => UIO(fw.close()))(
|
||||
fw =>
|
||||
UIO {
|
||||
lines.map(line => fw.append(line + System.lineSeparator()))
|
||||
})
|
||||
|
||||
override def moveFile(source: Path, target: Path): UIO[Unit] =
|
||||
IO {
|
||||
if (source.toFile.exists()) {
|
||||
Files.move(source, target, StandardCopyOption.ATOMIC_MOVE)
|
||||
}
|
||||
()
|
||||
}.catchAll(_ => UIO.unit)
|
||||
}
|
||||
}
|
||||
object Live extends Live
|
||||
|
@ -86,9 +144,13 @@ object FileSystem {
|
|||
val fileLinesResult: Task[List[String]]
|
||||
val isDirResult: Task[Boolean]
|
||||
val listFilesResult: UIO[List[File]]
|
||||
val listDirsResult: UIO[List[Path]]
|
||||
val lengthResult: UIO[Long]
|
||||
val lastModifiedResult: UIO[Instant]
|
||||
val managedFileInputStream: Task[ZManaged[Any, Throwable, FileInputStream]]
|
||||
val hasLocalFileResult: UIO[Boolean]
|
||||
val pathCacheResult: UIO[PathCache]
|
||||
val matchesResult: IO[Any, Hashes]
|
||||
|
||||
override val filesystem: Service = new Service {
|
||||
|
||||
|
@ -108,14 +170,33 @@ object FileSystem {
|
|||
override def listFiles(path: Path): UIO[List[File]] =
|
||||
listFilesResult
|
||||
|
||||
override def listDirs(path: Path): UIO[List[Path]] =
|
||||
listDirsResult
|
||||
|
||||
override def length(file: File): UIO[Long] =
|
||||
lengthResult
|
||||
|
||||
override def lastModified(file: File): UIO[Instant] =
|
||||
lastModifiedResult
|
||||
|
||||
override def hasLocalFile(
|
||||
sources: Sources,
|
||||
prefix: RemoteKey,
|
||||
remoteKey: RemoteKey): ZIO[FileSystem, Nothing, Boolean] =
|
||||
hasLocalFileResult
|
||||
|
||||
override def findCache(directory: Path): UIO[PathCache] =
|
||||
pathCacheResult
|
||||
|
||||
override def getHashes(path: Path,
|
||||
fileData: FileData): ZIO[FileSystem, Any, Hashes] =
|
||||
matchesResult
|
||||
|
||||
override def appendLines(lines: Iterable[String], file: File): UIO[Unit] =
|
||||
UIO.unit
|
||||
|
||||
override def moveFile(source: Path, target: Path): UIO[Unit] =
|
||||
UIO.unit
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,9 +217,18 @@ object FileSystem {
|
|||
final def isDirectory(file: File): RIO[FileSystem, Boolean] =
|
||||
ZIO.accessM(_.filesystem.isDirectory(file))
|
||||
|
||||
/**
|
||||
* Lists only files within the Path.
|
||||
*/
|
||||
final def listFiles(path: Path): ZIO[FileSystem, Nothing, List[File]] =
|
||||
ZIO.accessM(_.filesystem.listFiles(path))
|
||||
|
||||
/**
|
||||
* Lists only sub-directories within the Path.
|
||||
*/
|
||||
final def listDirs(path: Path): ZIO[FileSystem, Nothing, List[Path]] =
|
||||
ZIO.accessM(_.filesystem.listDirs(path))
|
||||
|
||||
final def length(file: File): ZIO[FileSystem, Nothing, Long] =
|
||||
ZIO.accessM(_.filesystem.length(file))
|
||||
|
||||
|
@ -147,4 +237,26 @@ object FileSystem {
|
|||
prefix: RemoteKey,
|
||||
remoteKey: RemoteKey): ZIO[FileSystem, Nothing, Boolean] =
|
||||
ZIO.accessM(_.filesystem.hasLocalFile(sources, prefix, remoteKey))
|
||||
|
||||
final def findCache(
|
||||
directory: Path): ZIO[FileSystem with Hasher, Nothing, PathCache] =
|
||||
ZIO.accessM(_.filesystem.findCache(directory))
|
||||
|
||||
final def getHashes(path: Path,
|
||||
fileData: FileData): ZIO[FileSystem, Any, Hashes] =
|
||||
ZIO.accessM(_.filesystem.getHashes(path, fileData))
|
||||
|
||||
final def lastModified(file: File): ZIO[FileSystem, Nothing, Instant] =
|
||||
ZIO.accessM(_.filesystem.lastModified(file))
|
||||
|
||||
final def appendLines(lines: Iterable[String],
|
||||
file: File): ZIO[FileSystem, Nothing, Unit] =
|
||||
ZIO.accessM(_.filesystem.appendLines(lines, file))
|
||||
|
||||
final def moveFile(
|
||||
source: Path,
|
||||
target: Path
|
||||
): ZIO[FileSystem, Nothing, Unit] =
|
||||
ZIO.accessM(_.filesystem.moveFile(source, target))
|
||||
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import java.nio.file.Path
|
|||
import java.util.concurrent.atomic.AtomicReference
|
||||
|
||||
import net.kemitix.thorp.domain.HashType.MD5
|
||||
import net.kemitix.thorp.domain.{HashType, MD5Hash}
|
||||
import net.kemitix.thorp.domain.{HashType, Hashes}
|
||||
import zio.{RIO, ZIO}
|
||||
|
||||
/**
|
||||
|
@ -15,27 +15,33 @@ trait Hasher {
|
|||
}
|
||||
object Hasher {
|
||||
trait Service {
|
||||
def typeFrom(str: String): ZIO[Hasher, IllegalArgumentException, HashType]
|
||||
|
||||
def hashObject(
|
||||
path: Path): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]]
|
||||
def hashObjectChunk(
|
||||
path: Path,
|
||||
cachedFileData: Option[FileData]): RIO[Hasher with FileSystem, Hashes]
|
||||
def hashObjectChunk(path: Path,
|
||||
chunkNumber: Long,
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]]
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Hashes]
|
||||
def hex(in: Array[Byte]): RIO[Hasher, String]
|
||||
def digest(in: String): RIO[Hasher, Array[Byte]]
|
||||
}
|
||||
trait Live extends Hasher {
|
||||
val hasher: Service = new Service {
|
||||
override def hashObject(
|
||||
path: Path): RIO[FileSystem, Map[HashType, MD5Hash]] =
|
||||
for {
|
||||
path: Path,
|
||||
cachedFileData: Option[FileData]): RIO[FileSystem, Hashes] =
|
||||
ZIO
|
||||
.fromOption(cachedFileData)
|
||||
.flatMap(fileData => FileSystem.getHashes(path, fileData))
|
||||
.orElse(for {
|
||||
md5 <- MD5HashGenerator.md5File(path)
|
||||
} yield Map(MD5 -> md5)
|
||||
} yield Map(MD5 -> md5))
|
||||
|
||||
override def hashObjectChunk(path: Path,
|
||||
override def hashObjectChunk(
|
||||
path: Path,
|
||||
chunkNumber: Long,
|
||||
chunkSize: Long)
|
||||
: RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Hashes] =
|
||||
for {
|
||||
md5 <- MD5HashGenerator.md5FileChunk(path,
|
||||
chunkNumber * chunkSize,
|
||||
|
@ -47,25 +53,33 @@ object Hasher {
|
|||
|
||||
override def digest(in: String): RIO[Hasher, Array[Byte]] =
|
||||
ZIO(MD5HashGenerator.digest(in))
|
||||
|
||||
override def typeFrom(
|
||||
str: String): ZIO[Hasher, IllegalArgumentException, HashType] =
|
||||
if (str.contentEquals("MD5")) {
|
||||
ZIO.succeed(MD5)
|
||||
} else {
|
||||
ZIO.fail(
|
||||
new IllegalArgumentException("Unknown Hash Type: %s".format(str)))
|
||||
}
|
||||
}
|
||||
}
|
||||
object Live extends Live
|
||||
|
||||
trait Test extends Hasher {
|
||||
val hashes: AtomicReference[Map[Path, Map[HashType, MD5Hash]]] =
|
||||
val hashes: AtomicReference[Map[Path, Hashes]] =
|
||||
new AtomicReference(Map.empty)
|
||||
val hashChunks
|
||||
: AtomicReference[Map[Path, Map[Long, Map[HashType, MD5Hash]]]] =
|
||||
val hashChunks: AtomicReference[Map[Path, Map[Long, Hashes]]] =
|
||||
new AtomicReference(Map.empty)
|
||||
val hasher: Service = new Service {
|
||||
override def hashObject(
|
||||
path: Path): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
override def hashObject(path: Path, cachedFileData: Option[FileData])
|
||||
: RIO[Hasher with FileSystem, Hashes] =
|
||||
ZIO(hashes.get()(path))
|
||||
|
||||
override def hashObjectChunk(path: Path,
|
||||
override def hashObjectChunk(
|
||||
path: Path,
|
||||
chunkNumber: Long,
|
||||
chunkSize: Long)
|
||||
: RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Hashes] =
|
||||
ZIO(hashChunks.get()(path)(chunkNumber))
|
||||
|
||||
override def hex(in: Array[Byte]): RIO[Hasher, String] =
|
||||
|
@ -73,18 +87,23 @@ object Hasher {
|
|||
|
||||
override def digest(in: String): RIO[Hasher, Array[Byte]] =
|
||||
ZIO(MD5HashGenerator.digest(in))
|
||||
|
||||
override def typeFrom(
|
||||
str: String): ZIO[Hasher, IllegalArgumentException, HashType] =
|
||||
Live.hasher.typeFrom(str)
|
||||
}
|
||||
}
|
||||
object Test extends Test
|
||||
|
||||
final def hashObject(
|
||||
path: Path): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
ZIO.accessM(_.hasher hashObject path)
|
||||
path: Path,
|
||||
cachedFileData: Option[FileData]): RIO[Hasher with FileSystem, Hashes] =
|
||||
ZIO.accessM(_.hasher.hashObject(path, cachedFileData))
|
||||
|
||||
final def hashObjectChunk(
|
||||
path: Path,
|
||||
chunkNumber: Long,
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Hashes] =
|
||||
ZIO.accessM(_.hasher hashObjectChunk (path, chunkNumber, chunkSize))
|
||||
|
||||
final def hex(in: Array[Byte]): RIO[Hasher, String] =
|
||||
|
@ -92,4 +111,9 @@ object Hasher {
|
|||
|
||||
final def digest(in: String): RIO[Hasher, Array[Byte]] =
|
||||
ZIO.accessM(_.hasher digest in)
|
||||
|
||||
final def typeFrom(
|
||||
str: String): ZIO[Hasher, IllegalArgumentException, HashType] =
|
||||
ZIO.accessM(_.hasher.typeFrom(str))
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
package net.kemitix.thorp.filesystem
|
||||
|
||||
import java.nio.file.{Path, Paths}
|
||||
import java.time.Instant
|
||||
import java.util.regex.Pattern
|
||||
|
||||
import net.kemitix.thorp.domain.{HashType, MD5Hash}
|
||||
import zio.{UIO, ZIO}
|
||||
|
||||
/**
|
||||
* Meta data for files in the current source, as of the last time Thorp processed this directory.
|
||||
*
|
||||
* <p>N.B. Does not include sub-directories.</p>
|
||||
*/
|
||||
final case class PathCache(
|
||||
data: PathCache.Data
|
||||
) {
|
||||
def get(path: Path): Option[FileData] = data.get(path)
|
||||
}
|
||||
|
||||
object PathCache {
|
||||
type Data = Map[Path, FileData]
|
||||
val fileName = ".thorp.cache"
|
||||
val tempFileName = ".thorp.cache.tmp"
|
||||
|
||||
def create(path: Path, fileData: FileData): UIO[Iterable[String]] =
|
||||
UIO {
|
||||
fileData.hashes.keys.map(hashType => {
|
||||
val hash = fileData.hashes(hashType)
|
||||
val modified = fileData.lastModified
|
||||
String.join(":",
|
||||
hashType.toString,
|
||||
hash.in,
|
||||
modified.toEpochMilli.toString,
|
||||
path.toString)
|
||||
})
|
||||
}
|
||||
|
||||
private val pattern =
|
||||
"^(?<hashtype>.+):(?<hash>.+):(?<modified>\\d+):(?<filename>.+)$"
|
||||
private val format = Pattern.compile(pattern)
|
||||
def fromLines(lines: Seq[String]): ZIO[Hasher, Nothing, PathCache] =
|
||||
ZIO
|
||||
.foreach(
|
||||
lines
|
||||
.map(format.matcher(_))
|
||||
.filter(_.matches())) { matcher =>
|
||||
for {
|
||||
hashType <- Hasher.typeFrom(matcher.group("hashtype"))
|
||||
} yield
|
||||
(Paths.get(matcher.group("filename")) -> FileData
|
||||
.create(
|
||||
Map[HashType, MD5Hash](
|
||||
hashType -> MD5Hash(matcher.group("hash"))),
|
||||
Instant.ofEpochMilli(matcher.group("modified").toLong)
|
||||
))
|
||||
}
|
||||
.catchAll({ _: IllegalArgumentException =>
|
||||
UIO(List.empty)
|
||||
})
|
||||
.map(list => mergeFileData(list))
|
||||
.map(map => PathCache(map))
|
||||
|
||||
private def mergeFileData(
|
||||
list: List[(Path, FileData)]
|
||||
): Data = {
|
||||
list.foldLeft(Map.empty[Path, FileData]) { (acc, pair) =>
|
||||
val (fileName, fileData) = pair
|
||||
acc.updatedWith(fileName)(
|
||||
_.map(fd => fd + fileData)
|
||||
.orElse(Some(fileData)))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package net.kemitix.thorp
|
||||
|
||||
package object filesystem {
|
||||
type FileName = String
|
||||
}
|
|
@ -6,15 +6,8 @@ import java.nio.file.Path
|
|||
import net.kemitix.eip.zio.MessageChannel.{EChannel, ESender}
|
||||
import net.kemitix.eip.zio.{Message, MessageChannel}
|
||||
import net.kemitix.thorp.config.Config
|
||||
import net.kemitix.thorp.domain.{
|
||||
Filter,
|
||||
HashType,
|
||||
LocalFile,
|
||||
MD5Hash,
|
||||
RemoteKey,
|
||||
Sources
|
||||
}
|
||||
import net.kemitix.thorp.filesystem.{FileSystem, Hasher}
|
||||
import net.kemitix.thorp.domain._
|
||||
import net.kemitix.thorp.filesystem._
|
||||
import zio.clock.Clock
|
||||
import zio.{RIO, UIO, ZIO}
|
||||
|
||||
|
@ -25,12 +18,21 @@ trait FileScanner {
|
|||
object FileScanner {
|
||||
|
||||
type RemoteHashes = Map[MD5Hash, RemoteKey]
|
||||
type Hashes = Map[HashType, MD5Hash]
|
||||
type ScannedFile = LocalFile
|
||||
type FileSender = ESender[Clock with Hasher with FileSystem with Config,
|
||||
type FileSender =
|
||||
ESender[Clock with Hasher with FileSystem with Config with FileScanner,
|
||||
Throwable,
|
||||
ScannedFile]
|
||||
type ScannerChannel = EChannel[Any, Throwable, ScannedFile]
|
||||
type CacheData = (Path, FileData)
|
||||
type CacheChannel = EChannel[Any, Throwable, CacheData]
|
||||
type CacheSender =
|
||||
ESender[Clock with FileSystem with Hasher with FileScanner with Config,
|
||||
Throwable,
|
||||
CacheData]
|
||||
|
||||
final def scanSources: RIO[FileScanner, FileSender] =
|
||||
ZIO.accessM(_.fileScanner.scanSources)
|
||||
|
||||
trait Service {
|
||||
def scanSources: RIO[FileScanner, FileSender]
|
||||
|
@ -40,49 +42,104 @@ object FileScanner {
|
|||
val fileScanner: Service = new Service {
|
||||
|
||||
override def scanSources: RIO[FileScanner, FileSender] =
|
||||
RIO { channel =>
|
||||
RIO { fileChannel =>
|
||||
(for {
|
||||
sources <- Config.sources
|
||||
_ <- ZIO.foreach(sources.paths)(scanPath(channel)(_))
|
||||
} yield ()) <* MessageChannel.endChannel(channel)
|
||||
_ <- ZIO.foreach(sources.paths) { sourcePath =>
|
||||
for {
|
||||
cacheSender <- scanSource(fileChannel)(sourcePath)
|
||||
cacheReceiver <- cacheReceiver(sourcePath)
|
||||
_ <- MessageChannel
|
||||
.pointToPoint(cacheSender)(cacheReceiver)
|
||||
.runDrain
|
||||
_ <- FileSystem.moveFile(
|
||||
sourcePath.resolve(PathCache.tempFileName),
|
||||
sourcePath.resolve(PathCache.fileName))
|
||||
} yield ()
|
||||
}
|
||||
} yield ()) <* MessageChannel.endChannel(fileChannel)
|
||||
}
|
||||
|
||||
private def scanPath(channel: ScannerChannel)(path: Path)
|
||||
: ZIO[Clock with Config with Hasher with FileSystem, Throwable, Unit] =
|
||||
private def scanSource(fileChannel: ScannerChannel)(
|
||||
sourcePath: Path): RIO[FileScanner, CacheSender] =
|
||||
RIO { cacheChannel =>
|
||||
(for {
|
||||
cache <- FileSystem.findCache(sourcePath)
|
||||
_ <- scanPath(fileChannel, cacheChannel)(sourcePath, cache)
|
||||
} yield ()) <* MessageChannel.endChannel(cacheChannel)
|
||||
}
|
||||
|
||||
private def scanPath(
|
||||
fileChannel: ScannerChannel,
|
||||
cacheChannel: CacheChannel)(path: Path, cache: PathCache)
|
||||
: ZIO[Clock with FileSystem with Hasher with FileScanner with Config,
|
||||
Throwable,
|
||||
Unit] =
|
||||
for {
|
||||
filters <- Config.filters
|
||||
dirs <- FileSystem.listDirs(path)
|
||||
_ <- ZIO.foreach(dirs)(scanPath(fileChannel, cacheChannel)(_, cache))
|
||||
files <- FileSystem.listFiles(path)
|
||||
_ <- ZIO.foreach(files)(handleFile(channel, filters))
|
||||
_ <- handleFiles(fileChannel, cacheChannel, cache, files)
|
||||
} yield ()
|
||||
|
||||
private def handleFiles(
|
||||
fileChannel: ScannerChannel,
|
||||
cacheChannel: CacheChannel,
|
||||
pathCache: PathCache,
|
||||
files: List[File]
|
||||
) =
|
||||
ZIO.foreach(files) {
|
||||
handleFile(fileChannel, cacheChannel, pathCache)
|
||||
}
|
||||
|
||||
private def handleFile(
|
||||
channel: ScannerChannel,
|
||||
filters: List[Filter]
|
||||
)(file: File) =
|
||||
fileChannel: ScannerChannel,
|
||||
cacheChannel: CacheChannel,
|
||||
cache: PathCache
|
||||
)(file: File)
|
||||
: ZIO[Clock with FileSystem with Hasher with Config, Throwable, Unit] =
|
||||
for {
|
||||
isDir <- FileSystem.isDirectory(file)
|
||||
isIncluded <- UIO(Filters.isIncluded(file.toPath)(filters))
|
||||
_ <- ZIO.when(isIncluded && isDir)(scanPath(channel)(file.toPath))
|
||||
_ <- ZIO.when(isIncluded && !isDir)(sendHashedFile(channel)(file))
|
||||
isIncluded <- Filters.isIncluded(file)
|
||||
_ <- ZIO.when(isIncluded) {
|
||||
sendHashedFile(fileChannel, cacheChannel)(file, cache)
|
||||
}
|
||||
} yield ()
|
||||
|
||||
private def sendHashedFile(channel: ScannerChannel)(file: File) =
|
||||
private def sendHashedFile(
|
||||
fileChannel: ScannerChannel,
|
||||
cacheChannel: CacheChannel
|
||||
)(file: File, pathCache: PathCache) =
|
||||
for {
|
||||
sources <- Config.sources
|
||||
source <- Sources.forPath(file.toPath)(sources)
|
||||
prefix <- Config.prefix
|
||||
hashes <- Hasher.hashObject(file.toPath)
|
||||
path = source.relativize(file.toPath)
|
||||
hashes <- Hasher.hashObject(file.toPath, pathCache.get(path))
|
||||
remoteKey <- RemoteKey.from(source, prefix, file)
|
||||
size <- FileSystem.length(file)
|
||||
localFile <- ZIO(
|
||||
fileMsg <- Message.create(
|
||||
LocalFile(file, source.toFile, hashes, remoteKey, size))
|
||||
hashedFile <- Message.create(localFile)
|
||||
_ <- MessageChannel.send(channel)(hashedFile)
|
||||
_ <- MessageChannel.send(fileChannel)(fileMsg)
|
||||
modified <- FileSystem.lastModified(file)
|
||||
cacheMsg <- Message.create(
|
||||
(path -> FileData.create(hashes, modified)))
|
||||
_ <- MessageChannel.send(cacheChannel)(cacheMsg)
|
||||
} yield ()
|
||||
|
||||
def cacheReceiver(sourcePath: Path)
|
||||
: UIO[MessageChannel.UReceiver[FileSystem, CacheData]] = {
|
||||
val tempFile = sourcePath.resolve(PathCache.tempFileName).toFile
|
||||
UIO { message =>
|
||||
val (path, fileData) = message.body
|
||||
for {
|
||||
line <- PathCache.create(path, fileData)
|
||||
_ <- FileSystem.appendLines(line, tempFile)
|
||||
} yield ()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object Live extends Live
|
||||
final def scanSources: RIO[FileScanner, FileSender] =
|
||||
ZIO.accessM(_.fileScanner.scanSources)
|
||||
}
|
||||
|
|
|
@ -1,12 +1,20 @@
|
|||
package net.kemitix.thorp.lib
|
||||
|
||||
import java.io.File
|
||||
import java.nio.file.Path
|
||||
|
||||
import net.kemitix.thorp.config.Config
|
||||
import net.kemitix.thorp.domain.Filter
|
||||
import net.kemitix.thorp.domain.Filter.{Exclude, Include}
|
||||
import zio.ZIO
|
||||
|
||||
object Filters {
|
||||
|
||||
def isIncluded(file: File): ZIO[Config, Nothing, Boolean] =
|
||||
for {
|
||||
filters <- Config.filters
|
||||
} yield isIncluded(file.toPath)(filters)
|
||||
|
||||
def isIncluded(p: Path)(filters: List[Filter]): Boolean = {
|
||||
sealed trait State
|
||||
final case class Unknown() extends State
|
||||
|
|
|
@ -11,7 +11,6 @@ import net.kemitix.thorp.domain.RemoteObjects.{
|
|||
}
|
||||
import net.kemitix.thorp.domain._
|
||||
import net.kemitix.thorp.filesystem.{FileSystem, Hasher}
|
||||
import net.kemitix.thorp.lib.FileScanner.Hashes
|
||||
import net.kemitix.thorp.storage.Storage
|
||||
import net.kemitix.thorp.uishell.UIEvent
|
||||
import zio._
|
||||
|
|
|
@ -2,4 +2,4 @@ package net.kemitix.thorp.storage.aws
|
|||
|
||||
import net.kemitix.thorp.domain.HashType
|
||||
|
||||
object ETag extends HashType
|
||||
case object ETag extends HashType
|
||||
|
|
|
@ -2,12 +2,12 @@ package net.kemitix.thorp.storage.aws.hasher
|
|||
|
||||
import java.nio.file.Path
|
||||
|
||||
import net.kemitix.thorp.domain.{HashType, MD5Hash}
|
||||
import net.kemitix.thorp.domain.{HashType, Hashes, MD5Hash}
|
||||
import net.kemitix.thorp.filesystem.Hasher.Live.{hasher => CoreHasher}
|
||||
import net.kemitix.thorp.filesystem.Hasher.Service
|
||||
import net.kemitix.thorp.filesystem.{FileSystem, Hasher}
|
||||
import net.kemitix.thorp.filesystem.{FileData, FileSystem, Hasher}
|
||||
import net.kemitix.thorp.storage.aws.ETag
|
||||
import zio.RIO
|
||||
import zio.{RIO, ZIO}
|
||||
|
||||
object S3Hasher {
|
||||
|
||||
|
@ -20,17 +20,20 @@ object S3Hasher {
|
|||
* @param path the local path to scan
|
||||
* @return a set of hash values
|
||||
*/
|
||||
override def hashObject(
|
||||
path: Path): RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
for {
|
||||
base <- CoreHasher.hashObject(path)
|
||||
override def hashObject(path: Path, cachedFileData: Option[FileData])
|
||||
: RIO[Hasher with FileSystem, Hashes] =
|
||||
ZIO
|
||||
.fromOption(cachedFileData)
|
||||
.flatMap(fileData => FileSystem.getHashes(path, fileData))
|
||||
.orElse(for {
|
||||
base <- CoreHasher.hashObject(path, cachedFileData)
|
||||
etag <- ETagGenerator.eTag(path).map(MD5Hash(_))
|
||||
} yield base + (ETag -> etag)
|
||||
} yield base + (ETag -> etag))
|
||||
|
||||
override def hashObjectChunk(path: Path,
|
||||
override def hashObjectChunk(
|
||||
path: Path,
|
||||
chunkNumber: Long,
|
||||
chunkSize: Long)
|
||||
: RIO[Hasher with FileSystem, Map[HashType, MD5Hash]] =
|
||||
chunkSize: Long): RIO[Hasher with FileSystem, Hashes] =
|
||||
CoreHasher.hashObjectChunk(path, chunkNumber, chunkSize)
|
||||
|
||||
override def hex(in: Array[Byte]): RIO[Hasher, String] =
|
||||
|
@ -38,6 +41,15 @@ object S3Hasher {
|
|||
|
||||
override def digest(in: String): RIO[Hasher, Array[Byte]] =
|
||||
CoreHasher.digest(in)
|
||||
|
||||
override def typeFrom(
|
||||
str: String): ZIO[Hasher, IllegalArgumentException, HashType] =
|
||||
if (str.contentEquals("ETag")) {
|
||||
RIO.succeed(ETag)
|
||||
} else {
|
||||
CoreHasher.typeFrom(str)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue