From ca29c89ca8ed8f57028ae48f6c88f8542e44d8a2 Mon Sep 17 00:00:00 2001 From: MattGarrish Date: Fri, 8 Feb 2019 15:40:48 -0400 Subject: [PATCH] fix: allow foreign namespaces in SVG content documents Main changes: - switch to a master NVDL schema for SVG Content Document validation (in EPUB 3) - allow elements and attributes in foreign namespaces in SVG Content Documents, except where otherwise forbidden in the spec (i.e. in the `foreignObject` element, where only HTML is allowed) - allow `data-*` attributes in SVG (by removing them at parsing time) - add more tests Also: - slightly refactor the attribute pre-processing in XMLParser Fix #491 --- .../com/adobe/epubcheck/ops/OPSChecker.java | 2 +- .../com/adobe/epubcheck/xml/XMLParser.java | 55 ++++++++------- .../adobe/epubcheck/xml/XMLValidators.java | 1 + .../epubcheck/schema/30/epub-svg-30.nvdl | 63 +++++++++++++---- .../adobe/epubcheck/schema/30/epub-svg-30.sch | 1 + .../adobe/epubcheck/ops/OPSCheckerTest.java | 69 +++++++++++++++---- .../30/single/svg/invalid/duplicate-ids.svg | 7 ++ ...reignObject-invalid-requiredExtensions.svg | 10 +++ .../foreignObject-non-html-content.svg | 12 ++++ .../svg/invalid/foreignObject-two-body.svg | 11 +++ .../resources/30/single/svg/invalid/rect.svg | 23 ------- .../30/single/svg/valid/custom-ns.svg | 9 +++ .../30/single/svg/valid/data-attribute.svg | 7 ++ .../30/single/svg/valid/foreignObject.svg | 10 +++ .../resources/30/single/svg/valid/rect.svg | 18 ----- 15 files changed, 201 insertions(+), 97 deletions(-) create mode 100644 src/test/resources/30/single/svg/invalid/duplicate-ids.svg create mode 100644 src/test/resources/30/single/svg/invalid/foreignObject-invalid-requiredExtensions.svg create mode 100644 src/test/resources/30/single/svg/invalid/foreignObject-non-html-content.svg create mode 100644 src/test/resources/30/single/svg/invalid/foreignObject-two-body.svg delete mode 100644 src/test/resources/30/single/svg/invalid/rect.svg create mode 100644 src/test/resources/30/single/svg/valid/custom-ns.svg create mode 100644 src/test/resources/30/single/svg/valid/data-attribute.svg create mode 100644 src/test/resources/30/single/svg/valid/foreignObject.svg delete mode 100644 src/test/resources/30/single/svg/valid/rect.svg diff --git a/src/main/java/com/adobe/epubcheck/ops/OPSChecker.java b/src/main/java/com/adobe/epubcheck/ops/OPSChecker.java index 451ae80ec..24919679e 100755 --- a/src/main/java/com/adobe/epubcheck/ops/OPSChecker.java +++ b/src/main/java/com/adobe/epubcheck/ops/OPSChecker.java @@ -58,7 +58,7 @@ public class OPSChecker implements ContentChecker, DocumentValidator .putAll(Predicates.and(mimetype("image/svg+xml"), version(EPUBVersion.VERSION_2)), XMLValidators.SVG_20_RNG, XMLValidators.IDUNIQUE_20_SCH) .putAll(Predicates.and(mimetype("image/svg+xml"), version(EPUBVersion.VERSION_3)), - XMLValidators.SVG_30_RNC, XMLValidators.SVG_30_SCH) + XMLValidators.SVG_30_NVDL) .putAll( and(or(profile(EPUBProfile.DICT), hasPubType(OPFData.DC_TYPE_DICT)), mimetype("application/xhtml+xml"), version(EPUBVersion.VERSION_3)), diff --git a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java index 3e9e385ce..ce101216a 100755 --- a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java +++ b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java @@ -60,6 +60,7 @@ import com.adobe.epubcheck.opf.ValidationContext; import com.adobe.epubcheck.util.EPUBVersion; import com.adobe.epubcheck.util.ResourceUtil; +import com.google.common.collect.ImmutableSet; import com.google.common.io.Closer; import com.thaiopensource.util.PropertyMapBuilder; import com.thaiopensource.validate.ValidateProperty; @@ -527,59 +528,57 @@ public void startElement(String namespaceURI, String localName, String qName, } } - // 3.0.1 custom attributes handling - private static final Set knownXHTMLContentDocsNamespaces = new HashSet(); - static - { - knownXHTMLContentDocsNamespaces.add(Namespaces.MATHML); - knownXHTMLContentDocsNamespaces.add(Namespaces.OPS); - knownXHTMLContentDocsNamespaces.add(Namespaces.SSML); - knownXHTMLContentDocsNamespaces.add(Namespaces.SVG); - knownXHTMLContentDocsNamespaces.add(Namespaces.XHTML); - knownXHTMLContentDocsNamespaces.add(Namespaces.XMLEVENTS); - knownXHTMLContentDocsNamespaces.add(Namespaces.XML); - knownXHTMLContentDocsNamespaces.add(Namespaces.XLINK); - } - private Attributes preprocessAttributes(String elemNamespace, String elemName, String elemQName, Attributes originalAttribs) { AttributesImpl attributes = new AttributesImpl(originalAttribs); - if ("application/xhtml+xml".equals(context.mimeType) - && context.version == EPUBVersion.VERSION_3) + try { - try + for (int i = attributes.getLength() - 1; i >= 0; i--) { - for (int i = attributes.getLength() - 1; i >= 0; i--) + if (context.version == EPUBVersion.VERSION_3) { - if (isDataAttribute(attributes, i) || isCustomNamespaceAttribute(attributes, i)) + // Remove data-* attributes in both XHTML and SVG + if (isDataAttribute(attributes, i)) { attributes.removeAttribute(i); } - else if (Namespaces.XHTML.equals(elemNamespace) + // Remove custom namespace attributes in XHTML + else if ("application/xhtml+xml".equals(context.mimeType) + && isHTMLCustomNamespace(attributes.getURI(i))) + { + attributes.removeAttribute(i); + } + // Normalize case of case-insensitive attributes in XHTML + else if ("application/xhtml+xml".equals(context.mimeType) + && Namespaces.XHTML.equals(elemNamespace) && isCaseInsensitiveAttribute(attributes, i)) { attributes.setValue(i, attributes.getValue(i).toLowerCase(Locale.ENGLISH)); } } - } catch (Exception e) - { - throw new IllegalStateException("data-* removal exception", e); } + } catch (Exception e) + { + throw new IllegalStateException("Unexpected error when pre-processing attributes", e); } return attributes; } private static boolean isDataAttribute(Attributes attributes, int index) { - return attributes.getLocalName(index).startsWith("data-"); + return "".equals(attributes.getURI(index)) + && attributes.getLocalName(index).startsWith("data-"); } - private boolean isCustomNamespaceAttribute(Attributes attributes, int index) + private static final Set KNOWN_XHTML_NAMESPACES = ImmutableSet.of(Namespaces.XHTML, + Namespaces.XML, Namespaces.OPS, Namespaces.SVG, Namespaces.MATHML, Namespaces.SSML, + Namespaces.XMLEVENTS, Namespaces.XLINK); + + private static boolean isHTMLCustomNamespace(String namespace) { - String ns = attributes.getURI(index); - return (ns != null && ns.trim().length() > 0 - && !knownXHTMLContentDocsNamespaces.contains(ns.trim())); + if (namespace == null || namespace.trim().isEmpty()) return false; + return !KNOWN_XHTML_NAMESPACES.contains(namespace.trim()); } private static boolean isCaseInsensitiveAttribute(Attributes attributes, int index) diff --git a/src/main/java/com/adobe/epubcheck/xml/XMLValidators.java b/src/main/java/com/adobe/epubcheck/xml/XMLValidators.java index 064855b05..676ab990d 100644 --- a/src/main/java/com/adobe/epubcheck/xml/XMLValidators.java +++ b/src/main/java/com/adobe/epubcheck/xml/XMLValidators.java @@ -37,6 +37,7 @@ public enum XMLValidators SIG_30_RNC("schema/30/ocf-signatures-30.rnc"), SVG_20_RNG("schema/20/rng/svg11.rng"), SVG_30_RNC("schema/30/epub-svg-30.rnc"), + SVG_30_NVDL("schema/30/epub-svg-30.nvdl"), SVG_30_SCH("schema/30/epub-svg-30.sch"), XHTML_20_NVDL("schema/20/rng/ops20.nvdl"), XHTML_20_SCH("schema/20/sch/xhtml.sch"), diff --git a/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.nvdl b/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.nvdl index 6e1ef8185..026d153a3 100644 --- a/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.nvdl +++ b/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.nvdl @@ -1,15 +1,54 @@ - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.sch b/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.sch index af3c91f47..10ad603a4 100644 --- a/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.sch +++ b/src/main/resources/com/adobe/epubcheck/schema/30/epub-svg-30.sch @@ -3,6 +3,7 @@ + diff --git a/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java b/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java index fd800a35f..43ec95399 100644 --- a/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java +++ b/src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java @@ -159,21 +159,7 @@ public void setup() expectedFatals.clear(); expectedUsage.clear(); } - - @Test - public void testValidateSVGRectInvalid() - { - Collections.addAll(expectedErrors, MessageId.RSC_005, MessageId.RSC_005, MessageId.RSC_005, - MessageId.RSC_005); - testValidateDocument("svg/invalid/rect.svg", "image/svg+xml", EPUBVersion.VERSION_3); - } - - @Test - public void testValidateSVGRectValid() - { - testValidateDocument("svg/valid/rect.svg", "image/svg+xml", EPUBVersion.VERSION_3); - } - + @Test public void testValidateXHTMLEdits001() { @@ -706,6 +692,18 @@ public void testValidateSVG_ValidStyleWithoutType_issue688() testValidateDocument("svg/valid/issue688.svg", "image/svg+xml", EPUBVersion.VERSION_3); } + @Test + public void testValidateSVG_WithDataAttribute() + { + testValidateDocument("svg/valid/data-attribute.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + @Test + public void testValidateSVG_WithCustomNamespace() + { + testValidateDocument("svg/valid/custom-ns.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + @Test public void testValidateSVG_Links_MisssingTitle() { @@ -713,6 +711,47 @@ public void testValidateSVG_Links_MisssingTitle() testValidateDocument("svg/invalid/svg-links.svg", "image/svg+xml", EPUBVersion.VERSION_3); } + @Test + public void testValidateSVG_ForeignObject() + { + // tests that 'foreignObject' conforming to the rules is accepted + testValidateDocument("svg/valid/foreignObject.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + @Test + public void testValidateSVG_ForeignObjectWithInvalidRequiredExtensions() + { + // tests that 'foreignObject' with a 'requiredExtensions' attribute other than the OPS NS is invalid + expectedErrors.add(MessageId.RSC_005); + testValidateDocument("svg/invalid/foreignObject-invalid-requiredExtensions.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + @Test + public void testValidateSVG_ForeignObjectWithNonXHTMLContent() + { + // tests that 'foreignObject' can't have children that are not HTML content + expectedErrors.add(MessageId.RSC_005); + testValidateDocument("svg/invalid/foreignObject-non-html-content.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + @Test + public void testValidateSVG_ForeignObjectWithTwoBodyElements() + { + // tests that 'foreignObject' can't have children that are not HTML content + expectedErrors.add(MessageId.RSC_005); + testValidateDocument("svg/invalid/foreignObject-two-body.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + + @Test + public void testValidateSVG_DuplicateIds() + { + // tests that duplicate IDs are detected + Collections.addAll(expectedErrors, MessageId.RSC_005, MessageId.RSC_005); + testValidateDocument("svg/invalid/duplicate-ids.svg", "image/svg+xml", EPUBVersion.VERSION_3); + } + + @Test public void testValidateXHTMLIssue204() { diff --git a/src/test/resources/30/single/svg/invalid/duplicate-ids.svg b/src/test/resources/30/single/svg/invalid/duplicate-ids.svg new file mode 100644 index 000000000..152074e42 --- /dev/null +++ b/src/test/resources/30/single/svg/invalid/duplicate-ids.svg @@ -0,0 +1,7 @@ + + + Test + Rectangle + + + diff --git a/src/test/resources/30/single/svg/invalid/foreignObject-invalid-requiredExtensions.svg b/src/test/resources/30/single/svg/invalid/foreignObject-invalid-requiredExtensions.svg new file mode 100644 index 000000000..f9dfec4ba --- /dev/null +++ b/src/test/resources/30/single/svg/invalid/foreignObject-invalid-requiredExtensions.svg @@ -0,0 +1,10 @@ + + + Test + Rectangle + + + hello + + diff --git a/src/test/resources/30/single/svg/invalid/foreignObject-non-html-content.svg b/src/test/resources/30/single/svg/invalid/foreignObject-non-html-content.svg new file mode 100644 index 000000000..ea40a2302 --- /dev/null +++ b/src/test/resources/30/single/svg/invalid/foreignObject-non-html-content.svg @@ -0,0 +1,12 @@ + + + Test + Rectangle + + + hello + hello + + diff --git a/src/test/resources/30/single/svg/invalid/foreignObject-two-body.svg b/src/test/resources/30/single/svg/invalid/foreignObject-two-body.svg new file mode 100644 index 000000000..8c1cbaf2f --- /dev/null +++ b/src/test/resources/30/single/svg/invalid/foreignObject-two-body.svg @@ -0,0 +1,11 @@ + + + Test + Rectangle + + + + + + diff --git a/src/test/resources/30/single/svg/invalid/rect.svg b/src/test/resources/30/single/svg/invalid/rect.svg deleted file mode 100644 index d7bbb2d9b..000000000 --- a/src/test/resources/30/single/svg/invalid/rect.svg +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - Title - Example rect01 - rectangle with sharp corners - - - - - - - - - - - \ No newline at end of file diff --git a/src/test/resources/30/single/svg/valid/custom-ns.svg b/src/test/resources/30/single/svg/valid/custom-ns.svg new file mode 100644 index 000000000..dd212c467 --- /dev/null +++ b/src/test/resources/30/single/svg/valid/custom-ns.svg @@ -0,0 +1,9 @@ + + + Test + Rectangle + + bar + + + diff --git a/src/test/resources/30/single/svg/valid/data-attribute.svg b/src/test/resources/30/single/svg/valid/data-attribute.svg new file mode 100644 index 000000000..1a4161770 --- /dev/null +++ b/src/test/resources/30/single/svg/valid/data-attribute.svg @@ -0,0 +1,7 @@ + + + Test + Rectangle + + diff --git a/src/test/resources/30/single/svg/valid/foreignObject.svg b/src/test/resources/30/single/svg/valid/foreignObject.svg new file mode 100644 index 000000000..720c4fcef --- /dev/null +++ b/src/test/resources/30/single/svg/valid/foreignObject.svg @@ -0,0 +1,10 @@ + + + Test + Rectangle + + + hello + + diff --git a/src/test/resources/30/single/svg/valid/rect.svg b/src/test/resources/30/single/svg/valid/rect.svg deleted file mode 100644 index 960f4f513..000000000 --- a/src/test/resources/30/single/svg/valid/rect.svg +++ /dev/null @@ -1,18 +0,0 @@ - - - - - Example rect01 - rectangle with sharp corners - - - - - - - - - \ No newline at end of file