diff --git a/README.org b/README.org index ff90617..072d5bf 100644 --- a/README.org +++ b/README.org @@ -8,6 +8,47 @@ The normal ~aws s3 sync ...~ command only uses the time stamp of files to decide what files need to be copied. This utility looks at the md5 hash of the file contents. +* How does aws-s3-sync-by-hash do it? +The following is a rough, first draft, pseudo-scala, impression of the process. +** constructor +val options = Load command line arguments and AWS security keys. + +** def sync(): Promise[Upload] + +val uploadPromise = createUploadPromise() +if options contains delete then createDeletePromise() +else return uploadPromise + +** def createUploadPromise(): Promise[Upload] + +readdir(options(root)) +loadS3MetaData +filterByHash +uploadFile +callback(file => uploadedFiles += file) + +** def loadS3MetaData: Stream[S3MetaData] + +HEAD(bucket, key) +map (metadata => S3MetaData(localFile, bucket, key, metadata.hash, metadata.lastModified)) + +** def filterByHash(p: S3MetaData => Boolean): Stream[S3MetaData] + +md5File(localFile) +filter(localHash => options.force || localHash != metadataHash) + +** def uploadFile(upload: Upload): IO[Unit] + +S3Upload(bucket, key, localFile) + +** def createDeletePromise(): Promise[Delete] + +S3AllKeys(bucket, key) +filter(remoteKey => localFileExists(remoteFile).negate) + +** def deleteFile(delete: Delete): IO[Unit] + +S3Delete(bucket, key, remoteKey)