Skip to content

Commit

Permalink
fix: do not check entity references in comment or CDATA (#949)
Browse files Browse the repository at this point in the history
- suppress poorly implemented checks `HTM-023` and `HTM-024`
  (entity references are already checked at parsing time)
- add more tests for entity references

Fixes #800
  • Loading branch information
rdeltour authored Jan 21, 2019
1 parent 2a244e7 commit 4307542
Show file tree
Hide file tree
Showing 14 changed files with 535 additions and 97 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ private void initialize()
severities.put(MessageId.HTM_020, Severity.USAGE);
severities.put(MessageId.HTM_021, Severity.USAGE);
severities.put(MessageId.HTM_022, Severity.USAGE);
severities.put(MessageId.HTM_023, Severity.WARNING);
severities.put(MessageId.HTM_024, Severity.USAGE);
severities.put(MessageId.HTM_023, Severity.SUPPRESSED);
severities.put(MessageId.HTM_024, Severity.SUPPRESSED);
severities.put(MessageId.HTM_025, Severity.WARNING);
severities.put(MessageId.HTM_027, Severity.USAGE);
severities.put(MessageId.HTM_028, Severity.USAGE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1126,5 +1126,10 @@ public void testInvalidCssFontSizeValue()
testValidateDocument("invalid/invalid-css-font-size-value");
}

@Test
public void testEntities() {
// tests that comments and CDATA sections aren't parsed for entity references
testValidateDocument("valid/entities-in-comment-or-cdata");
}

}
33 changes: 33 additions & 0 deletions src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -944,4 +944,37 @@ public void testValidateEmptyLangAttribute_EPUB3_Valid()
EPUBVersion.VERSION_3);
}

@Test
public void testEntitiesValid()
{
// tests that known named character references are accepted
// also tests that 'entity references' in comments or CDATA sections are ignored
testValidateDocument("xhtml/valid/entities.xhtml", "application/xhtml+xml", EPUBVersion.VERSION_3);
}

@Test
public void testEntitiesInternalDeclaration()
{
// tests that internal entity declarations are allowed
testValidateDocument("xhtml/valid/entities-internal.xhtml", "application/xhtml+xml", EPUBVersion.VERSION_3);
}

@Test
public void testEntitiesMissingSemicolon()
{
// tests that entity references not ending with a semicolon cause a parsing error
Collections.addAll(expectedFatals, MessageId.RSC_016);
Collections.addAll(expectedErrors, MessageId.RSC_005);
testValidateDocument("xhtml/invalid/entities-missing-semicolon.xhtml", "application/xhtml+xml", EPUBVersion.VERSION_3);
}

@Test
public void testEntitiesUnknown()
{
// tests that unknown entity references are reported as errors
Collections.addAll(expectedFatals, MessageId.RSC_016);
Collections.addAll(expectedErrors, MessageId.RSC_005);
testValidateDocument("xhtml/invalid/entities-unknown.xhtml", "application/xhtml+xml", EPUBVersion.VERSION_3);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OPS/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<title>Minimal EPUB</title>
</head>
<body epub:type="bodymatter">
<h1>Loomings</h1>
<p>Call me Ishmael.</p>
<!-- this is a valid entity reference -->
&amp;
<!-- this is a valid character reference -->
&#38;
<!-- this is not an entity reference, as it’s in a comment -->
<!-- &foo; -->
<!-- this is not an entity reference, as it’s in a CDATA section -->
<![CDATA[&foo;]]>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<title>Minimal Nav</title>
</head>
<body>
<nav epub:type="toc">
<ol>
<li><a href="content_001.xhtml">content 001</a></li>
</ol>
</nav>
<nav epub:type="landmarks">
<ol>
<li><a href="content_001.xhtml" epub:type="bodymatter">Start Reading</a></li>
</ol>
</nav>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
</manifest>
<spine>
<itemref idref="content_001" />
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
application/epub+zip
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>Test</title>
</head>
<body>
<!-- this entity reference is missing the final semicolon -->
&amp
</body>
</html>
10 changes: 10 additions & 0 deletions src/test/resources/30/single/xhtml/invalid/entities-unknown.xhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>Test</title>
</head>
<body>
<!-- this entity reference uses an unknown entity name -->
&foo;
</body>
</html>
13 changes: 13 additions & 0 deletions src/test/resources/30/single/xhtml/valid/entities-internal.xhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" standalone="yes" ?>
<!DOCTYPE html [
<!ENTITY foo "foo">
]>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>Test</title>
</head>
<body>
<!-- entity declarations are not allowed -->
&foo;
</body>
</html>
16 changes: 16 additions & 0 deletions src/test/resources/30/single/xhtml/valid/entities.xhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Test</title>
</head>
<body>
<!-- this is a valid entity reference -->
&amp;
<!-- this is a valid character reference -->
&#38;
<!-- this is not an entity reference, as it’s in a comment -->
<!-- &foo; -->
<!-- this is not an entity reference, as it’s in a CDATA section -->
<![CDATA[&foo;]]>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<jhove xmlns="http://hul.harvard.edu/ois/xml/ns/jhove"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
date="2017-01-02"
name="epubcheck"
release="4.0.3-SNAPSHOT"
xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove http://hul.harvard.edu/ois/xml/xsd/jhove/jhove.xsd">
<date>2017-01-02T18:40:31+01:00</date>
<repInfo uri="./com/adobe/epubcheck/test/encryption/epub20_encryption_binary_content.epub">
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
date="2019-01-20"
name="epubcheck"
release="4.1.1-SNAPSHOT"
xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove http://hul.harvard.edu/ois/xml/xsd/jhove/jhove.xsd">
<date>2019-01-21T09:52:50+01:00</date>
<repInfo uri="./test-classes/com/adobe/epubcheck/test/encryption/epub20_encryption_binary_content.epub">
<created>2015-06-02T16:34:06Z</created>
<format>application/epub+zip</format>
<version>2.0.1</version>
Expand All @@ -18,8 +18,6 @@
<message severity="error" subMessage="RSC-012">RSC-012, ERROR, [Fragment identifier is not defined.], OEBPS/toc.ncx (36-81)</message>
<message severity="error" subMessage="RSC-012">RSC-012, ERROR, [Fragment identifier is not defined.], OEBPS/toc.ncx (42-75)</message>
<message severity="error" subMessage="RSC-012">RSC-012, ERROR, [Fragment identifier is not defined.], OEBPS/toc.ncx (48-61)</message>
<message severity="error" subMessage="HTM-023">HTM-023, WARN, [An invalid XHTML Named Entity was found: '&amp;0;'.], OEBPS/Text/pdfMigration.html (18-197)</message>
<message severity="error" subMessage="HTM-023">HTM-023, WARN, [An invalid XHTML Named Entity was found: '&amp;l0xb'.], OEBPS/Text/pdfMigration.html (291-6)</message>
</messages>
<mimeType>application/epub+zip</mimeType>
<properties>
Expand Down Expand Up @@ -77,13 +75,13 @@
</values>
</property>
<property>
<name>MediaTypes</name>
<values arity="Array" type="String">
<value>application/x-dtbncx+xml</value>
<value>application/xhtml+xml</value>
<value>image/png</value>
<value>image/jpeg</value>
</values>
<name>MediaTypes</name>
<values arity="Array" type="String">
<value>application/x-dtbncx+xml</value>
<value>application/xhtml+xml</value>
<value>image/png</value>
<value>image/jpeg</value>
</values>
</property>
<property>
<name>hasEncryption</name>
Expand Down
Loading

0 comments on commit 4307542

Please sign in to comment.