From 6b450a36df695185fef28c97fe10d5cf61919475 Mon Sep 17 00:00:00 2001 From: Paul Campbell Date: Sat, 26 Mar 2022 18:29:39 +0000 Subject: [PATCH] Enable specifying the value for dcterms:modified value (#27) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * pom: set version to 1.2.0 * pom: specify dependency versions as properties * pom: add assertj-core as test dependency * Add test for setting modified metadata value * Revert "pom: set version to 1.2.0" No change was needed waranting a ‘minor’ version change. This reverts commit 5051fcf6bac670fffbf5fdfbb769818fee8cf637. * Override default dcterms:modified value if provided * pom: version set to 1.2.0 --- pom.xml | 26 +- .../epub/creator/impl/OpfCreatorDefault.java | 480 +++++++++--------- .../epub/creator/EpubCreatorTest.java | 135 +++-- 3 files changed, 351 insertions(+), 290 deletions(-) diff --git a/pom.xml b/pom.xml index a79a7d8..b05060a 100644 --- a/pom.xml +++ b/pom.xml @@ -9,9 +9,8 @@ - net.kemitix epub-creator - 1.1.0 + 1.2.0 scm:git:git@github.com:kemitix/epub-creator.git @@ -27,38 +26,49 @@ 2.22 2.8.0 + 3.22.0 + 1.18.20 + 3.2.2 + 2.10.0 + 2.24 + 4.13.2 junit junit - 4.13.2 + ${junit.version} test net.sourceforge.htmlcleaner htmlcleaner - 2.24 + ${htmlcleaner.version} commons-io commons-io - 2.10.0 + ${commons-io.version} commons-collections commons-collections - 3.2.2 + ${commons-collections.version} org.projectlombok lombok - 1.18.20 + ${lombok.version} true - + + org.assertj + assertj-core + ${assertj.version} + test + diff --git a/src/main/java/coza/opencollab/epub/creator/impl/OpfCreatorDefault.java b/src/main/java/coza/opencollab/epub/creator/impl/OpfCreatorDefault.java index fc49608..9697aaf 100644 --- a/src/main/java/coza/opencollab/epub/creator/impl/OpfCreatorDefault.java +++ b/src/main/java/coza/opencollab/epub/creator/impl/OpfCreatorDefault.java @@ -1,234 +1,246 @@ -/* Copyright 2014 OpenCollab. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package coza.opencollab.epub.creator.impl; - -import coza.opencollab.epub.creator.EpubConstants; -import coza.opencollab.epub.creator.api.MetadataItem; -import coza.opencollab.epub.creator.api.OpfCreator; -import coza.opencollab.epub.creator.model.Content; -import coza.opencollab.epub.creator.model.EpubBook; -import org.htmlcleaner.CleanerProperties; -import org.htmlcleaner.ContentNode; -import org.htmlcleaner.HtmlCleaner; -import org.htmlcleaner.PrettyXmlSerializer; -import org.htmlcleaner.Serializer; -import org.htmlcleaner.TagNode; - -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; - -/** - * Default implementation of the OpfCreator. This follows EPUB3 standards to - * create the OPF file content. - * - * @author OpenCollab - */ -public class OpfCreatorDefault implements OpfCreator { - - /** - * The template XML used to create the OPF file. This is settable if a - * different template needs to be used. - */ - private String opfXML = EpubConstants.OPF_XML; - - /** - * HtmlCleaner used to clean the XHTML document - */ - private final HtmlCleaner cleaner; - - /** - * XmlSerializer used to format to XML String output - */ - private final Serializer htmlSetdown; - - private final List metadataItems = new ArrayList<>(); - - public OpfCreatorDefault() { - cleaner = new HtmlCleaner(); - CleanerProperties htmlProperties = cleaner.getProperties(); - htmlProperties.setOmitHtmlEnvelope(true); - htmlProperties.setAdvancedXmlEscape(false); - htmlProperties.setUseEmptyElementTags(true); - htmlSetdown = new PrettyXmlSerializer(htmlProperties); - } - - @Override - public void addMetadata(MetadataItem metadataItem) { - this.metadataItems.add(metadataItem); - } - - /** - * {@inheritDoc} - */ - @Override - public String createOpfString(EpubBook book) { - TagNode tagNode = cleaner.clean(opfXML); - addMetaDataTags(tagNode, book); - addManifestTags(tagNode, book); - addSpineTags(tagNode, book); - addCustomMetadata(tagNode, book); - return htmlSetdown.getAsString(tagNode); - } - - private void addCustomMetadata(TagNode tagNode, EpubBook book) { - TagNode metaNode = tagNode.findElementByName("metadata", true); - metadataItems.forEach(item -> { - TagNode node = new TagNode(item.getName()); - if (item.hasId()) { - node.addAttribute("id", item.getId()); - } - if (item.hasProperty()) { - node.addAttribute("property", item.getProperty()); - } - if (item.hasRefines()) { - node.addAttribute("refines", item.getRefines()); - } - if (item.hasValue()) { - node.addChild(new ContentNode(item.getValue())); - } - metaNode.addChild(node); - }); - } - - /** - * Add the required meta data - * - * @param tagNode the HTML tagNode of the OPF template - * @param book the EpubBook - */ - private void addMetaDataTags(TagNode tagNode, EpubBook book) { - TagNode metaNode = tagNode.findElementByName("metadata", true); - addNodeData(metaNode, "dc:identifier", book.getId()); - addNodeData(metaNode, "dc:title", book.getTitle()); - addNodeData(metaNode, "dc:language", book.getLanguage()); - addNodeData(metaNode, "meta", new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'").format(new Date())); - if (book.getAuthor() != null) { - TagNode creatorNode = new TagNode("dc:creator"); - creatorNode.addChild(new ContentNode(book.getAuthor())); - metaNode.addChild(creatorNode); - } - } - - /** - * Adds a item tag to the manifest for each Content object. - * - * The manifest contains all Content that will be added to the EPUB as files - * - * @param tagNode the HTML tagNode of the OPF template - * @param book the EpubBook - */ - private void addManifestTags(TagNode tagNode, EpubBook book) { - TagNode manifestNode = tagNode.findElementByName("manifest", true); - for (Content content : book.getContents()) { - manifestNode.addChild(buildItemNode(content)); - } - } - - /** - * Builds an item tag from the Content object - * - * @param content - * @return - */ - private TagNode buildItemNode(Content content) { - TagNode itemNode = new TagNode("item"); - itemNode.addAttribute("href", content.getHref()); - itemNode.addAttribute("id", content.getId()); - itemNode.addAttribute("media-type", content.getMediaType()); - if (content.getProperties() != null) { - itemNode.addAttribute("properties", content.getProperties()); - } - if (content.hasFallBack()) { - itemNode.addAttribute("fallback", content.getFallBack().getId()); - } - return itemNode; - } - - /** - * Adds item ref tags for all Content objects that must be added to the - * spine. - * - * The spine contains all the resources that will be shown when reading the - * book from start to end - * - * @param tagNode the HTML tagNode of the OPF template - * @param book the EpubBook - */ - private void addSpineTags(TagNode tagNode, EpubBook book) { - TagNode spineNode = tagNode.findElementByName("spine", true); - for (Content content : book.getContents()) { - if (content.isSpine()) { - spineNode.addChild(buildItemrefNode(content)); - } - } - } - - /** - * Builds an item ref tag from the Content object - * - * @param content - * @return - */ - private TagNode buildItemrefNode(Content content) { - TagNode itemNode = new TagNode("itemref"); - itemNode.addAttribute("idref", content.getId()); - if (!content.isLinear()) { - itemNode.addAttribute("linear", "no"); - } - return itemNode; - } - - /** - * Adds a ContentNode (value) with to a child element of the TagNode - * - * {value} - * - * @param tagNode - * @param elementName - * @param value - */ - private void addNodeData(TagNode tagNode, String elementName, String value) { - TagNode editNode = tagNode.findElementByName(elementName, true); - editNode.addChild(new ContentNode(value)); - } - - /** - * The base XML used for the OPF file. - * - * @return the OPF XML text - */ - public String getOpfXML() { - return opfXML; - } - - /** - * The base XML used for the OPF file. This is optional as there is a EPUB3 - * standard default but it can be overridden. - * - * @param opfXML the OPF XML to set - */ - public void setOpfXML(String opfXML) { - this.opfXML = opfXML; - } - -} +/* Copyright 2014 OpenCollab. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package coza.opencollab.epub.creator.impl; + +import coza.opencollab.epub.creator.EpubConstants; +import coza.opencollab.epub.creator.api.MetadataItem; +import coza.opencollab.epub.creator.api.OpfCreator; +import coza.opencollab.epub.creator.model.Content; +import coza.opencollab.epub.creator.model.EpubBook; +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.ContentNode; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.PrettyXmlSerializer; +import org.htmlcleaner.Serializer; +import org.htmlcleaner.TagNode; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Optional; + +/** + * Default implementation of the OpfCreator. This follows EPUB3 standards to + * create the OPF file content. + * + * @author OpenCollab + */ +public class OpfCreatorDefault implements OpfCreator { + + /** + * The template XML used to create the OPF file. This is settable if a + * different template needs to be used. + */ + private String opfXML = EpubConstants.OPF_XML; + + /** + * HtmlCleaner used to clean the XHTML document + */ + private final HtmlCleaner cleaner; + + /** + * XmlSerializer used to format to XML String output + */ + private final Serializer htmlSetdown; + + private final List metadataItems = new ArrayList<>(); + + public OpfCreatorDefault() { + cleaner = new HtmlCleaner(); + CleanerProperties htmlProperties = cleaner.getProperties(); + htmlProperties.setOmitHtmlEnvelope(true); + htmlProperties.setAdvancedXmlEscape(false); + htmlProperties.setUseEmptyElementTags(true); + htmlSetdown = new PrettyXmlSerializer(htmlProperties); + } + + @Override + public void addMetadata(MetadataItem metadataItem) { + this.metadataItems.add(metadataItem); + } + + /** + * {@inheritDoc} + */ + @Override + public String createOpfString(EpubBook book) { + TagNode tagNode = cleaner.clean(opfXML); + addMetaDataTags(tagNode, book); + addManifestTags(tagNode, book); + addSpineTags(tagNode, book); + addCustomMetadata(tagNode, book); + return htmlSetdown.getAsString(tagNode); + } + + private void addCustomMetadata(TagNode tagNode, EpubBook book) { + TagNode metaNode = tagNode.findElementByName("metadata", true); + metadataItems.forEach(item -> { + TagNode node = new TagNode(item.getName()); + if (item.hasId()) { + node.addAttribute("id", item.getId()); + } + if (item.hasProperty()) { + node.addAttribute("property", item.getProperty()); + } + if (item.hasRefines()) { + node.addAttribute("refines", item.getRefines()); + } + if (item.hasValue()) { + node.addChild(new ContentNode(item.getValue())); + } + metaNode.addChild(node); + }); + } + + /** + * Add the required meta data + * + * @param tagNode the HTML tagNode of the OPF template + * @param book the EpubBook + */ + private void addMetaDataTags(TagNode tagNode, EpubBook book) { + TagNode metaNode = tagNode.findElementByName("metadata", true); + addNodeData(metaNode, "dc:identifier", book.getId()); + addNodeData(metaNode, "dc:title", book.getTitle()); + addNodeData(metaNode, "dc:language", book.getLanguage()); + Optional customModifiedValue = metadataItems.stream() + .filter(MetadataItem::hasValue) + .filter(MetadataItem::hasProperty) + .filter(item -> item.getProperty().equals("dcterms:modified")) + .findFirst(); + if (customModifiedValue.isPresent()) { + MetadataItem item = customModifiedValue.get(); + addNodeData(metaNode, "meta", item.getValue()); + metadataItems.remove(item); + } else { + addNodeData(metaNode, "meta", new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'").format(new Date())); + } + if (book.getAuthor() != null) { + TagNode creatorNode = new TagNode("dc:creator"); + creatorNode.addChild(new ContentNode(book.getAuthor())); + metaNode.addChild(creatorNode); + } + } + + /** + * Adds a item tag to the manifest for each Content object. + * + * The manifest contains all Content that will be added to the EPUB as files + * + * @param tagNode the HTML tagNode of the OPF template + * @param book the EpubBook + */ + private void addManifestTags(TagNode tagNode, EpubBook book) { + TagNode manifestNode = tagNode.findElementByName("manifest", true); + for (Content content : book.getContents()) { + manifestNode.addChild(buildItemNode(content)); + } + } + + /** + * Builds an item tag from the Content object + * + * @param content + * @return + */ + private TagNode buildItemNode(Content content) { + TagNode itemNode = new TagNode("item"); + itemNode.addAttribute("href", content.getHref()); + itemNode.addAttribute("id", content.getId()); + itemNode.addAttribute("media-type", content.getMediaType()); + if (content.getProperties() != null) { + itemNode.addAttribute("properties", content.getProperties()); + } + if (content.hasFallBack()) { + itemNode.addAttribute("fallback", content.getFallBack().getId()); + } + return itemNode; + } + + /** + * Adds item ref tags for all Content objects that must be added to the + * spine. + * + * The spine contains all the resources that will be shown when reading the + * book from start to end + * + * @param tagNode the HTML tagNode of the OPF template + * @param book the EpubBook + */ + private void addSpineTags(TagNode tagNode, EpubBook book) { + TagNode spineNode = tagNode.findElementByName("spine", true); + for (Content content : book.getContents()) { + if (content.isSpine()) { + spineNode.addChild(buildItemrefNode(content)); + } + } + } + + /** + * Builds an item ref tag from the Content object + * + * @param content + * @return + */ + private TagNode buildItemrefNode(Content content) { + TagNode itemNode = new TagNode("itemref"); + itemNode.addAttribute("idref", content.getId()); + if (!content.isLinear()) { + itemNode.addAttribute("linear", "no"); + } + return itemNode; + } + + /** + * Adds a ContentNode (value) with to a child element of the TagNode + * + * {value} + * + * @param tagNode + * @param elementName + * @param value + */ + private void addNodeData(TagNode tagNode, String elementName, String value) { + TagNode editNode = tagNode.findElementByName(elementName, true); + editNode.addChild(new ContentNode(value)); + } + + /** + * The base XML used for the OPF file. + * + * @return the OPF XML text + */ + public String getOpfXML() { + return opfXML; + } + + /** + * The base XML used for the OPF file. This is optional as there is a EPUB3 + * standard default but it can be overridden. + * + * @param opfXML the OPF XML to set + */ + public void setOpfXML(String opfXML) { + this.opfXML = opfXML; + } + +} diff --git a/src/test/java/coza/opencollab/epub/creator/EpubCreatorTest.java b/src/test/java/coza/opencollab/epub/creator/EpubCreatorTest.java index 279990b..8b94571 100644 --- a/src/test/java/coza/opencollab/epub/creator/EpubCreatorTest.java +++ b/src/test/java/coza/opencollab/epub/creator/EpubCreatorTest.java @@ -1,48 +1,87 @@ -package coza.opencollab.epub.creator; - -import coza.opencollab.epub.creator.api.MetadataItem; -import coza.opencollab.epub.creator.model.EpubBook; -import java.io.File; -import java.io.FileOutputStream; -import junit.framework.Assert; -import org.apache.commons.io.IOUtils; -import org.junit.Test; - -/** - * - * @author OpenCollab - */ -public class EpubCreatorTest { - - @Test - public void testEpubCreate() { - try (FileOutputStream file = new FileOutputStream(new File("test.epub"))) { - EpubBook book = new EpubBook("en", "Samuel .-__Id1", "Samuel Test Book", "Samuel Holtzkampf"); - - MetadataItem.Builder builder = MetadataItem.builder(); - book.addMetadata(builder.name("dc:creator").value("Bob Smith")); - book.addMetadata(builder.name("meta") - .property("role").refines("#editor-id") - .value("Editor")); - - book.addContent(this.getClass().getResourceAsStream("/epub30-overview.xhtml"), - "application/xhtml+xml", "xhtml/epub30-overview.xhtml", true, true).setId("Overview"); - book.addContent(this.getClass().getResourceAsStream("/idpflogo_web_125.jpg"), - "image/jpeg", "img/idpflogo_web_125.jpg", false, false); - book.addContent(this.getClass().getResourceAsStream("/epub-spec.css"), - "text/css", "css/epub-spec.css", false, false); - book.addTextContent("TestHtml", "xhtml/samuelTest2.xhtml", "Samuel test one two four!!!!!\nTesting two").setToc(true); - book.addTextContent("TestHtml", "xhtml/samuelTest.xhtml", "Samuel test one two three\nTesting two").setToc(true); - book.addCoverImage(IOUtils.toByteArray(this.getClass().getResourceAsStream("/P1010832.jpg")), - "image/jpeg", "images/P1010832.jpg"); - - - book.writeToStream(file); - // TODO : real tests to see if document correct, this is just to test that creation is succesfull - Assert.assertEquals("test", "test"); - } catch (Exception ex) { - System.out.println(ex); - Assert.assertEquals("test", "test1"); - } - } -} +package coza.opencollab.epub.creator; + +import coza.opencollab.epub.creator.api.MetadataItem; +import coza.opencollab.epub.creator.model.EpubBook; +import lombok.SneakyThrows; +import lombok.val; +import org.apache.commons.io.IOUtils; +import org.assertj.core.api.WithAssertions; +import org.junit.Test; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStream; +import java.util.Scanner; +import java.util.zip.ZipFile; + +/** + * @author OpenCollab + */ +public class EpubCreatorTest implements WithAssertions { + + String author = "Samuel Holtzkampf"; + String modified = "modified-date-and-time"; + + @Test + public void bookHasAuthor() { + //when + val book = createEpubBook(); + //then + assertThat(book.getAuthor()).isEqualTo(author); + + } + + @Test + public void hasSetModifiedValue() { + //given + //TODO use a proper temp file + val file = new File("test.epub"); + writeBookToFile(createEpubBook(), file); + //when + String bookOpf = unzipFileEntry(file, "content/book.opf"); + //then + assertThat(bookOpf).containsOnlyOnce(""); + assertThat(bookOpf).contains(String.format("%s", modified)); + } + + @SneakyThrows + private String unzipFileEntry(File file, String name) { + val zipFile = new ZipFile(file); + val entry = zipFile.getEntry(name); + val inputStream = zipFile.getInputStream(entry); + try (Scanner scanner = new Scanner(inputStream)) { + return scanner.useDelimiter("\\A").next(); + } + } + + @SneakyThrows + private void writeBookToFile(EpubBook book, File file) { + try (OutputStream outputStream = new FileOutputStream(file)) { + book.writeToStream(outputStream); + } + } + + @SneakyThrows + private EpubBook createEpubBook() { + EpubBook book = new EpubBook("en", "Samuel .-__Id1", "Samuel Test Book", author); + + MetadataItem.Builder builder = MetadataItem.builder(); + book.addMetadata(builder.name("dc:creator").value("Bob Smith")); + book.addMetadata(builder.name("meta") + .property("role").refines("#editor-id") + .value("Editor")); + book.addMetadata((builder.name("meta").property("dcterms:modified").value(modified))); + + book.addContent(this.getClass().getResourceAsStream("/epub30-overview.xhtml"), + "application/xhtml+xml", "xhtml/epub30-overview.xhtml", true, true).setId("Overview"); + book.addContent(this.getClass().getResourceAsStream("/idpflogo_web_125.jpg"), + "image/jpeg", "img/idpflogo_web_125.jpg", false, false); + book.addContent(this.getClass().getResourceAsStream("/epub-spec.css"), + "text/css", "css/epub-spec.css", false, false); + book.addTextContent("TestHtml", "xhtml/samuelTest2.xhtml", "Samuel test one two four!!!!!\nTesting two").setToc(true); + book.addTextContent("TestHtml", "xhtml/samuelTest.xhtml", "Samuel test one two three\nTesting two").setToc(true); + book.addCoverImage(IOUtils.toByteArray(this.getClass().getResourceAsStream("/P1010832.jpg")), + "image/jpeg", "images/P1010832.jpg"); + return book; + } +}