From 0d6f927acf88407f3e26c95c7eb0725379c8878f Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Mon, 14 Nov 2022 22:20:19 +0100 Subject: [PATCH] feat: update the reporting of file encoding issues This commit changes the errors and warnings reported when EPUBCheck detects an invalid or non-recommended file encoding. The following errors codes are now reported: - `RSC-027` (new): warning reported for XML documents encoded in UTF-16 - `RSC-028` (new): error reported for XML documents in an invalid encoding - `HTM-058` (new): error reported for XHTML encoded in UTF-16 (the HTML standard has an authoring requirement for UTF-8) - `CSS-003` (updated): warning reported for CSS encoded in UTF-16 - `CSS-004` (updated): error reported for CSS in an invalid encoding Note: previously both `CSS-003` and `CSS-004` were errors, reported for a disallowed encoding. The only difference is that one was reported when the encoded was detected from a BOM, the other from a `@charset` declaration. This commit repurposes `CSS-003` as the warning raised for UTF-16, and `CSS-004` as the disallowed-encoding error. Fixes #1245 --- .../com/adobe/epubcheck/css/CSSChecker.java | 21 +++++-- .../epubcheck/messages/DefaultSeverities.java | 5 +- .../adobe/epubcheck/messages/MessageId.java | 3 + ...ngSniffer.java => XMLEncodingSniffer.java} | 4 +- .../com/adobe/epubcheck/xml/XMLParser.java | 30 +++++++-- .../messages/MessageBundle.properties | 13 ++-- .../xml-encoding-latin1-declaration-error.opf | 16 +++++ .../xml-encoding-unknown-declared-error.opf | 16 +++++ ...utf16-BOM-and-utf8-declaration-warning.opf | Bin 0 -> 1314 bytes ...oding-utf16-BOM-no-declaration-warning.opf | Bin 0 -> 1236 bytes .../xml-encoding-utf16-declared-warning.opf | Bin 0 -> 1316 bytes .../files/xml-encoding-utf32-BOM-error.opf | Bin 0 -> 2468 bytes .../files/xml-encoding-utf8-BOM-valid.opf | 16 +++++ .../xml-encoding-utf8-declared-valid.opf | 16 +++++ ...xml-encoding-utf8-no-declaration-valid.opf | 15 +++++ .../epub3/03-resources/resources.feature | 58 +++++++++++++++++- .../content-document-css.feature | 23 +++++-- .../content-document-xhtml.feature | 7 +++ .../EPUB/content_001.xhtml | 0 .../EPUB/nav.xhtml | 0 .../EPUB/package.opf | 0 .../EPUB/style.css | 2 +- .../META-INF/container.xml | 0 .../mimetype | 0 .../EPUB/content_001.xhtml | 0 .../EPUB/nav.xhtml | 0 .../EPUB/package.opf | 0 .../EPUB/style.css | Bin 0 -> 108 bytes .../META-INF/container.xml | 0 .../mimetype | 0 .../EPUB/content_001.xhtml | 12 ++++ .../EPUB/nav.xhtml | 14 +++++ .../EPUB/package.opf | 17 +++++ .../EPUB/style.css | Bin 0 -> 64 bytes .../META-INF/container.xml | 6 ++ .../mimetype | 1 + .../EPUB/content_001.xhtml | 12 ++++ .../EPUB/nav.xhtml | 14 +++++ .../EPUB/package.opf | 17 +++++ .../EPUB/style.css | 0 .../META-INF/container.xml | 6 ++ .../mimetype | 1 + .../files/encoding-utf16-error.xhtml | Bin 0 -> 408 bytes 43 files changed, 318 insertions(+), 27 deletions(-) rename src/main/java/com/adobe/epubcheck/xml/{EncodingSniffer.java => XMLEncodingSniffer.java} (97%) create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-latin1-declaration-error.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-unknown-declared-error.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-and-utf8-declaration-warning.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-no-declaration-warning.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf16-declared-warning.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf32-BOM-error.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf8-BOM-valid.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf8-declared-valid.opf create mode 100644 src/test/resources/epub3/03-resources/files/xml-encoding-utf8-no-declaration-valid.opf rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/EPUB/content_001.xhtml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/EPUB/nav.xhtml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/EPUB/package.opf (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/EPUB/style.css (84%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/META-INF/container.xml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-enc-error => content-css-encoding-latin1-error}/mimetype (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf16-declared-warning}/EPUB/content_001.xhtml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf16-declared-warning}/EPUB/nav.xhtml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf16-declared-warning}/EPUB/package.opf (100%) create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/style.css rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf16-declared-warning}/META-INF/container.xml (100%) rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf16-declared-warning}/mimetype (100%) create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/nav.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/package.opf create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/style.css create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/META-INF/container.xml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/mimetype create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/nav.xhtml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/package.opf rename src/test/resources/epub3/06-content-document/files/{content-css-charset-utf8-valid => content-css-encoding-utf8-declared-valid}/EPUB/style.css (100%) create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/META-INF/container.xml create mode 100644 src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/mimetype create mode 100644 src/test/resources/epub3/06-content-document/files/encoding-utf16-error.xhtml diff --git a/src/main/java/com/adobe/epubcheck/css/CSSChecker.java b/src/main/java/com/adobe/epubcheck/css/CSSChecker.java index bcf26a913..5f64da66a 100644 --- a/src/main/java/com/adobe/epubcheck/css/CSSChecker.java +++ b/src/main/java/com/adobe/epubcheck/css/CSSChecker.java @@ -138,17 +138,26 @@ CssSource getCssSource() if (source.getInputStream().getBomCharset().isPresent()) { charset = source.getInputStream().getBomCharset().get().toLowerCase(Locale.ROOT); - if (!charset.equals("utf-8") && !charset.startsWith("utf-16")) + if (!charset.equals("utf-8")) { - report.message(MessageId.CSS_004, EPUBLocation.of(context), charset); + if (charset.startsWith("utf-16")) + { + report.message(MessageId.CSS_003, EPUBLocation.of(context), charset); + } else { + report.message(MessageId.CSS_004, EPUBLocation.of(context), charset); + } } - } - if (source.getInputStream().getCssCharset().isPresent()) + } else if (source.getInputStream().getCssCharset().isPresent()) { charset = source.getInputStream().getCssCharset().get().toLowerCase(Locale.ROOT); - if (!charset.equals("utf-8") && !charset.startsWith("utf-16")) + if (!charset.equals("utf-8")) { - report.message(MessageId.CSS_003, EPUBLocation.of(context), charset); + if (charset.startsWith("utf-16")) + { + report.message(MessageId.CSS_003, EPUBLocation.of(context), charset); + } else { + report.message(MessageId.CSS_004, EPUBLocation.of(context), charset); + } } } } diff --git a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java index b6ad8ffd1..10b2529ec 100644 --- a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java +++ b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java @@ -69,7 +69,7 @@ private void initialize() // CSS severities.put(MessageId.CSS_001, Severity.ERROR); severities.put(MessageId.CSS_002, Severity.ERROR); - severities.put(MessageId.CSS_003, Severity.ERROR); + severities.put(MessageId.CSS_003, Severity.WARNING); severities.put(MessageId.CSS_004, Severity.ERROR); severities.put(MessageId.CSS_005, Severity.ERROR); severities.put(MessageId.CSS_006, Severity.USAGE); @@ -141,6 +141,7 @@ private void initialize() severities.put(MessageId.HTM_055, Severity.WARNING); severities.put(MessageId.HTM_056, Severity.ERROR); severities.put(MessageId.HTM_057, Severity.ERROR); + severities.put(MessageId.HTM_058, Severity.ERROR); // Media severities.put(MessageId.MED_001, Severity.ERROR); @@ -337,6 +338,8 @@ private void initialize() severities.put(MessageId.RSC_024, Severity.USAGE); severities.put(MessageId.RSC_025, Severity.USAGE); severities.put(MessageId.RSC_026, Severity.ERROR); + severities.put(MessageId.RSC_027, Severity.WARNING); + severities.put(MessageId.RSC_028, Severity.ERROR); // Scripting severities.put(MessageId.SCP_001, Severity.SUPPRESSED); // checking scripts is out of scope diff --git a/src/main/java/com/adobe/epubcheck/messages/MessageId.java b/src/main/java/com/adobe/epubcheck/messages/MessageId.java index 7724bb5d4..031bcc044 100644 --- a/src/main/java/com/adobe/epubcheck/messages/MessageId.java +++ b/src/main/java/com/adobe/epubcheck/messages/MessageId.java @@ -135,6 +135,7 @@ public enum MessageId implements Comparable HTM_055("HTM_055"), HTM_056("HTM_056"), HTM_057("HTM_057"), + HTM_058("HTM_058"), // Messages associated with media (images, audio and video) MED_001("MED-001"), @@ -331,6 +332,8 @@ public enum MessageId implements Comparable RSC_024("RSC-024"), RSC_025("RSC-025"), RSC_026("RSC-026"), + RSC_027("RSC-027"), + RSC_028("RSC-028"), // Messages relating to scripting SCP_001("SCP-001"), diff --git a/src/main/java/com/adobe/epubcheck/xml/EncodingSniffer.java b/src/main/java/com/adobe/epubcheck/xml/XMLEncodingSniffer.java similarity index 97% rename from src/main/java/com/adobe/epubcheck/xml/EncodingSniffer.java rename to src/main/java/com/adobe/epubcheck/xml/XMLEncodingSniffer.java index e45911288..bb4b584f1 100644 --- a/src/main/java/com/adobe/epubcheck/xml/EncodingSniffer.java +++ b/src/main/java/com/adobe/epubcheck/xml/XMLEncodingSniffer.java @@ -4,7 +4,7 @@ import java.io.InputStream; import java.util.Locale; -public final class EncodingSniffer +public final class XMLEncodingSniffer { private static final byte[][] UTF16_MAGIC = { { (byte) 0xFE, (byte) 0xFF }, @@ -108,7 +108,7 @@ public static String sniffEncoding(InputStream in) return encoding.toUpperCase(Locale.ROOT); } - private EncodingSniffer() + private XMLEncodingSniffer() { // Not instanciable. } diff --git a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java index 6fd924096..4b01d0261 100755 --- a/src/main/java/com/adobe/epubcheck/xml/XMLParser.java +++ b/src/main/java/com/adobe/epubcheck/xml/XMLParser.java @@ -29,6 +29,7 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; +import org.w3c.epubcheck.constants.MIMEType; import org.xml.sax.ContentHandler; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; @@ -135,13 +136,33 @@ public void process() } // Check encoding - String encoding = EncodingSniffer.sniffEncoding(buffered); - if (encoding != null && !encoding.equals("UTF-8") && !encoding.equals("UTF-16")) + // If the result is null, the XML parser will must parse it as UTF-8 + String encoding = XMLEncodingSniffer.sniffEncoding(buffered); + if (encoding != null && !encoding.equals("UTF-8")) { - report.message(MessageId.CSS_003, EPUBLocation.of(context), encoding); + if (encoding.equals("UTF-16")) + { + // XHTML requires UTF-8, UTF-16 is reported as an error + if (MIMEType.XHTML.is(context.mimeType)) + { + report.message(MessageId.HTM_058, EPUBLocation.of(context)); + } + // For other XML types, UTF-16 is reported as a warning + else + { + report.message(MessageId.RSC_027, EPUBLocation.of(context)); + } + } + else + { + report.message(MessageId.RSC_028, EPUBLocation.of(context), encoding); + } } // Build the input source + // We do not set the source encoding name, but instead let the SAXParser + // apply its own encoding-sniffing logic, as it can report useful errors + // (for instance a mismatch between a BOM and the XML declaration) InputSource source = new InputSource(buffered); source.setSystemId(url.toString()); @@ -163,7 +184,8 @@ public void process() } catch (SAXException e) { // All errors should have already been reported by the error handler - if (report.getFatalErrorCount() == 0) { + if (report.getFatalErrorCount() == 0) + { report.message(MessageId.RSC_016, EPUBLocation.of(context), e.getMessage()); } } diff --git a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties index 0a78f94c6..3cef47ca3 100644 --- a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties +++ b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties @@ -40,8 +40,8 @@ CHK_008=Error encountered while processing an item "%1$s"; skip other checks for #CSS CSS_001=The "%1$s" property must not be included in an EPUB Style Sheet. CSS_002=Empty or NULL reference found. -CSS_003=Only UTF-8 and UTF-16 encodings are allowed, detected %1$s. -CSS_004=Only UTF-8 and UTF-16 encodings are allowed, detected %1$s BOM. +CSS_003=CSS document is encoded in UTF-16. It should be encoded in UTF-8 instead. +CSS_004=CSS documents must be encoded in UTF-8, detected %1%s; CSS_005=Conflicting alternate style attributes found: %1$s. CSS_006=CSS selector specifies fixed position. CSS_007=Font-face reference "%1$s" refers to non-standard font type "%2$s". @@ -68,7 +68,7 @@ CSS_025=CSS class Selector could not be found. CSS_025_SUG=Check for typos or define a class selector to document the use of the class. CSS_028=Use of Font-face declaration. CSS_029=Found CSS class name "%1$s" but no "%2$s" property was declared in the package document. -CSS_030=The package document declares media overlays styling class names but no CSS was found in the content document. +CSS_030=The package document declares media overlays styling class names but no CSS was found in the content document. #HTM - XHTML related messages HTM_001=Any publication resource that is an XML-based media type must be a valid XML 1.0 document. XML version found: %1$s. @@ -125,7 +125,8 @@ HTM_053=Found an external file link (file://) in file: "%1$s". HTM_054=Custom attribute namespace ("%1$s") must not include the string "%2$s" in its domain. HTM_055=The "%1$s" element should not be used (discouraged construct) HTM_056=Viewport metadata has no "%1$s" dimension (both "width" and "height" properties are required) -HTM_057=Viewport "%1$s" value must be a positive number or the keyword "device-%1$s" +HTM_057=Viewport "%1$s" value must be a positive number or the keyword "device-%1$s" +HTM_058=HTML documents must be encoded in UTF-8, but UTF-16 was detected. #media MED_001=Video poster must have core media image type. @@ -346,7 +347,9 @@ RSC_022=Cannot check image details (requires Java version 7 or higher). RSC_023=Couldn’t parse host of URL "%1$s" (probably due to disallowed characters or missing slashes after the protocol) RSC_024=Informative parsing warning: %1$s RSC_025=Informative parsing error: %1$s -RSC_026=URL "%1$s" leaks outside the container (it is not a valid-relative-ocf-URL-with-fragment string) +RSC_026=URL "%1$s" leaks outside the container (it is not a valid-relative-ocf-URL-with-fragment string) +RSC_027=XML document is encoded in UTF-16. It should be encoded in UTF-8 instead. +RSC_028=XML documents must be encoded in UTF-8, but %1%s was detected. #Scripting SCP_001=Use of Javascript eval() function in EPUB scripts is a security risk. diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-latin1-declaration-error.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-latin1-declaration-error.opf new file mode 100644 index 000000000..4eebba7ad --- /dev/null +++ b/src/test/resources/epub3/03-resources/files/xml-encoding-latin1-declaration-error.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-unknown-declared-error.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-unknown-declared-error.opf new file mode 100644 index 000000000..636745986 --- /dev/null +++ b/src/test/resources/epub3/03-resources/files/xml-encoding-unknown-declared-error.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-and-utf8-declaration-warning.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-and-utf8-declaration-warning.opf new file mode 100644 index 0000000000000000000000000000000000000000..21038c0539334e0ecf600369d5cc78410e3fceed GIT binary patch literal 1314 zcmcJPPfy!G6vXGur?9vp5=>EBS#FRjK6Zt7vm3__vBase z*1O()e`emi^~-avQ$1;}J0*J5L`$tS(?W&5XkV#%I$};ZDRj;LnA1Yja5~g4UFcAs zu{hO0d&*U6gxw$BQ+$$j?1HF{cdLr0RHi=9-|+N^IA-sLTnq9~IXNclkv_3ngcBp! zMfYKYE;7hqG1{OrV*5QX&vef`0jqQQq+)f$Fk>UTXG8{b_A26x8J^o5GIp;!n3P0c zlD%PIyFQWb7>?+SD|TGblsRTgh@OX-SwK_;`y`+=Dx-{hw*StTxy(Ujz0P&1p}w+; zmB@+fyzN-#)Xq{@=z$nkXb`Y9p1bGU{$2HJ@Wv|s&`*8S8O&-tU0w~cy5b31FN5M% zrP9ApX)~%oxrFx$Zu4~)M)k&_lJ1w3emP|R$UD^uz3CpxHt$rw1EM-xRJ$M4Y3p5g zfjh}PG9hafdL!1+idfTzweN-XjVfP-j48Nme+%vjQLJXSU5`8Bd*M51ALH{a8^Oah zDEYSch*gQrj5zMhcuV^&?}PPi+o$eLb$9U=_&;{wi~Iir&s$}E@8Tb!#u5>`W8B?u gE1zRs!nnY1mszZE)Z@KsyTVPb?WoSa{r?*16~gA(RsaA1 literal 0 HcmV?d00001 diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-no-declaration-warning.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-utf16-BOM-no-declaration-warning.opf new file mode 100644 index 0000000000000000000000000000000000000000..eda2ad5e025217c928fb599747c9fcd280c2976c GIT binary patch literal 1236 zcmcJPPfy!G5XIk_PZ8ych+|q6DN1vw%7FuI5AB5uiV_1+lVFUY?U!%+d$X*Ze-bBT zt#`dU^XARGS^xc~v1&~<(@);Fs?^hOt+doykNT*-?$q+s8mZv<70*Bm&Dp!rK=*3! zD|D(M&l#)xIO&7*3|<4yg1vR1y~AS&YK}4@(v-8?IIURu?1_1y2j)sA5z8kns~d(f z&Fq0O(GeM}*lWR=GdyQ>=P+V-b|8})^oHnN2DU3eS~862h>z@)*p$pES`YMbV2*-O z8}_|mX;x;L_H2L2mvdRcYKiB$zUhZPu}YN~gX_4PSk7rPO0Lio7*=Q!+&a(w^KE}? zwOhZAZ>r)Kebt3NlUe3-$g5+PE8auvCKR_St^P!%ol#57hP@yl)NQ}?dA z`+V25q`PB*yL-clU1P$5_{7yvA=ovsB@v$7|Jgg}YeWsE)q< H|C;;uqnO|{Zmb1hZqL;4$9fGiLieGSBtEJO!uh3;drt@WuW2!1G#J-@EunsIf%E?ihFX h>&oX?moP5y+h-Ol9QAms+OBYuYd5O1@BY7rdI73h!KdI$@IAO2Tn^p`KL*sf9qbSM&RXnc z{~>rChX(ORCZEnnTk;8NfY_$y8>K76wFdC7j}jvL)Hh;?_Z zf7+e!5qG}P&Y#UYXg&9VlQ?TR>W!EFVy*@1SUu0$mOKNEt&4L#vbO$`eZ8!fZE>yt zXvE_q4?mu%w*(J@r@`~!Ua$o7OyGp?S-g5r@?lepe){5JhWP8b?~r`{`F&f@^W>Y8 zWni;^J!_A8wmEq;^Z$u@2Vd9M({}8#&-WQXVpj>zm@^t6L+FE zE*Gh{mptFEew%PK_w7^vdVln<13b;aNuYmw``sC+Lo0pG<#?{v{p`2(zB0~dE@Ch0 zFE91?x2d|8?05M$$Nw+>(8Kdx?=0UttG%mPCtqfd=l6qmV%69+^Wwb+IEWLk-+AtA gp5Y}q^3B_PUd|VJSGMUojcc{jLc5-u|9>6+13=QtlmGw# literal 0 HcmV?d00001 diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-BOM-valid.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-BOM-valid.opf new file mode 100644 index 000000000..cb79798bb --- /dev/null +++ b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-BOM-valid.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-declared-valid.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-declared-valid.opf new file mode 100644 index 000000000..0d1eec6e9 --- /dev/null +++ b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-declared-valid.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-no-declaration-valid.opf b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-no-declaration-valid.opf new file mode 100644 index 000000000..ac6d675ce --- /dev/null +++ b/src/test/resources/epub3/03-resources/files/xml-encoding-utf8-no-declaration-valid.opf @@ -0,0 +1,15 @@ + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/03-resources/resources.feature b/src/test/resources/epub3/03-resources/resources.feature index 9ce586246..501f065a2 100644 --- a/src/test/resources/epub3/03-resources/resources.feature +++ b/src/test/resources/epub3/03-resources/resources.feature @@ -388,18 +388,72 @@ ## 3.9 XML conformance - @spec @xref:sec-xml-constraint + @spec @xref:sec-xml-constraints + Scenario: an XML document encoded as UTF-8 with an encoding declaration is valid + When checking file 'xml-encoding-utf8-declared-valid.opf' + Then no errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: an XML document encoded as UTF-8 with a BOM is valid + When checking file 'xml-encoding-utf8-BOM-valid.opf' + Then no errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: an XML document encoded as UTF-8 with no encoding declaration is valid + When checking file 'xml-encoding-utf8-no-declaration-valid.opf' + Then no errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Warn about an XML document encoded as UTF-16 (with an encoding declaration) + When checking file 'xml-encoding-utf16-declared-warning.opf' + Then warning RSC-027 is reported + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Warn about an XML document encoded as UTF-16 (not declared but with a BOM) + When checking file 'xml-encoding-utf16-BOM-no-declaration-warning.opf' + Then warning RSC-027 is reported + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Warn about an XML document encoded as UTF-16 (even with an UTF-8 declaration) + When checking file 'xml-encoding-utf16-BOM-and-utf8-declaration-warning.opf' + Then warning RSC-027 is reported + And fatal error RSC-016 is reported (by the XML parser) + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Report an XML document encoded as ISO-8859-1 (detected in the encoding declaration) + When checking file 'xml-encoding-latin1-declaration-error.opf' + Then error RSC-028 is reported + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Report an XML document encoded as UCS-4 (detected with a BOM) + When checking file 'xml-encoding-utf32-BOM-error.opf' + Then error RSC-028 is reported + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints + Scenario: Report an XML document declared with an unknown encoding name + When checking file 'xml-encoding-unknown-declared-error.opf' + Then error RSC-028 is reported + And fatal error RSC-016 is reported (by the XML parser) + And no other errors or warnings are reported + + @spec @xref:sec-xml-constraints Scenario: a not well-formed Package Document is reported When checking file 'conformance-xml-malformed-error.opf' Then fatal error RSC-016 is reported (parsing error) And no other errors or warnings are reported - @spec @xref:sec-xml-constraint + @spec @xref:sec-xml-constraints Scenario: using a not-declared namespace is not allowed When checking file 'conformance-xml-undeclared-namespace-error.opf' Then fatal error RSC-016 is reported (parsing error) And no other errors or warnings are reported + Scenario: Verify an attribute value with leading/trailing whitespace is allowed (issue 332) When checking EPUB 'conformance-xml-id-leading-trailing-spaces-valid' Then no errors or warnings are reported diff --git a/src/test/resources/epub3/06-content-document/content-document-css.feature b/src/test/resources/epub3/06-content-document/content-document-css.feature index 008b63b71..2fb748009 100644 --- a/src/test/resources/epub3/06-content-document/content-document-css.feature +++ b/src/test/resources/epub3/06-content-document/content-document-css.feature @@ -38,15 +38,26 @@ Feature: EPUB 3 — Content Documents — CSS ##### Encoding @spec @xref:sec-css-req - Scenario: Verify a CSS file with a `@charset` declaration and UTF8 encoding - See also issue #262 - When checking EPUB 'content-css-charset-utf8-valid' + Scenario: Verify a CSS document encoded in UTF-8 (declared with `@charset`) + When checking EPUB 'content-css-encoding-utf8-declared-valid' Then no errors or warnings are reported @spec @xref:sec-css-req - Scenario: Report a CSS file with a `@charset` declaration that is not utf-8 - When checking EPUB 'content-css-charset-enc-error' - Then error CSS-003 is reported + Scenario: Warn about a CSS document encoded in UTF-16 (declared with `@charset`) + When checking EPUB 'content-css-encoding-utf16-declared-warning' + Then warning CSS-003 is reported + And no other errors or warnings are reported + + @spec @xref:sec-css-req + Scenario: Warn about a CSS document encoded in UTF-16 (and no `@charset` declaration) + When checking EPUB 'content-css-encoding-utf16-not-declared-warning' + Then warning CSS-003 is reported + And no other errors or warnings are reported + + @spec @xref:sec-css-req + Scenario: Report a CSS document with a `@charset` declaration that is not utf-8 or utf-16 + When checking EPUB 'content-css-encoding-latin1-error' + Then error CSS-004 is reported And no other errors or warnings are reported ##### Resources and imports diff --git a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature index e81512ece..2497c960a 100644 --- a/src/test/resources/epub3/06-content-document/content-document-xhtml.feature +++ b/src/test/resources/epub3/06-content-document/content-document-xhtml.feature @@ -37,6 +37,13 @@ Feature: EPUB 3 — Content Documents — XHTML #### Document Properties - HTML Conformance + + #### Encoding + + Scenario: Report an XHMTL document not encoded as UTF-8 + When checking document 'encoding-utf16-error.xhtml' + Then error HTM-058 is reported + And no other errors or warnings are reported #### Document Title diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/content_001.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/content_001.xhtml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/content_001.xhtml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/content_001.xhtml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/nav.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/nav.xhtml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/nav.xhtml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/nav.xhtml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/package.opf b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/package.opf similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/package.opf rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/package.opf diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/style.css b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/style.css similarity index 84% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/style.css rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/style.css index e5cf72eb9..02c79f916 100644 --- a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/EPUB/style.css +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/EPUB/style.css @@ -1,4 +1,4 @@ -@charset "ISO-8859-7"; +@charset "ISO-8859-1"; body { margin-left : 6em; diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/META-INF/container.xml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/META-INF/container.xml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/META-INF/container.xml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/META-INF/container.xml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/mimetype b/src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/mimetype similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-enc-error/mimetype rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-latin1-error/mimetype diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/content_001.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/content_001.xhtml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/content_001.xhtml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/content_001.xhtml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/nav.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/nav.xhtml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/nav.xhtml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/nav.xhtml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/package.opf b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/package.opf similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/package.opf rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/package.opf diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/style.css b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/EPUB/style.css new file mode 100644 index 0000000000000000000000000000000000000000..59531fdabcc877a6789e91b93064751c3d101366 GIT binary patch literal 108 zcmXYpK?*=n07T!Gog2uW*jTWU;shK(`XZvRldEHXX=)ZT^X7Re=o#tQIWU~~1hh2N oge0VtR7!GODPmz_WhN6bXl=y#T+d2O{VZVE_OC literal 0 HcmV?d00001 diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/META-INF/container.xml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/META-INF/container.xml similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/META-INF/container.xml rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/META-INF/container.xml diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/mimetype b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/mimetype similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/mimetype rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-declared-warning/mimetype diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/content_001.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/content_001.xhtml new file mode 100644 index 000000000..bf2bf7f4d --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/content_001.xhtml @@ -0,0 +1,12 @@ + + + + + Minimal EPUB + + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/nav.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/package.opf b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/package.opf new file mode 100644 index 000000000..3fcc3adbd --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/package.opf @@ -0,0 +1,17 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/style.css b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/EPUB/style.css new file mode 100644 index 0000000000000000000000000000000000000000..d4f6b4d9c6c8ae13a75d82895f99f79ca82d5628 GIT binary patch literal 64 zcmezOpFxiyhar<8k0G1EiXo99jiH1gm7$11fuWj#i-D6NnIRu22NDC3RzPtj13?BQ L25X>fEdv(-!dwe_ literal 0 HcmV?d00001 diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/META-INF/container.xml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/META-INF/container.xml new file mode 100644 index 000000000..2ca12eff7 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/mimetype b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf16-not-declared-warning/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/content_001.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/content_001.xhtml new file mode 100644 index 000000000..bf2bf7f4d --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/content_001.xhtml @@ -0,0 +1,12 @@ + + + + + Minimal EPUB + + + +

Loomings

+

Call me Ishmael.

+ + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/nav.xhtml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/package.opf b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/package.opf new file mode 100644 index 000000000..3fcc3adbd --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/package.opf @@ -0,0 +1,17 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/style.css b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/style.css similarity index 100% rename from src/test/resources/epub3/06-content-document/files/content-css-charset-utf8-valid/EPUB/style.css rename to src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/EPUB/style.css diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/META-INF/container.xml b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/META-INF/container.xml new file mode 100644 index 000000000..2ca12eff7 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/mimetype b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/06-content-document/files/content-css-encoding-utf8-declared-valid/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file diff --git a/src/test/resources/epub3/06-content-document/files/encoding-utf16-error.xhtml b/src/test/resources/epub3/06-content-document/files/encoding-utf16-error.xhtml new file mode 100644 index 0000000000000000000000000000000000000000..a02495f6401c34901003e52b35865e34712f6efb GIT binary patch literal 408 zcmZXQOAo<76otPnzhc-k5s8G94c07dEImpkUSR}(9%pV-smM*6>AClQ=XBn$gc>6z z44E-!${0sR;J}^%Rg&+Q<6-KND}Cyw2hQ~Hs=HNF?>0?NT-Zu&(Y5m0EvR)lrbrqpPtoO%cC2X!4QIW6RWH6@a7>TEREq9b~0E?yL8 y+c`b*13Nu&VxZedMF>*;kDe#-@+fCXSn8kGJWJU!`TcTor680yqDyf