Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add better url checks #731

Merged
merged 3 commits into from
Feb 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ static Map<MessageId, Severity> getDefaultSeverities()
map.put(MessageId.RSC_020, Severity.ERROR);
map.put(MessageId.RSC_021, Severity.ERROR);
map.put(MessageId.RSC_022, Severity.INFO);
map.put(MessageId.RSC_023, Severity.WARNING);

// Scripting
map.put(MessageId.SCP_001, Severity.USAGE);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ public enum MessageId implements Comparable<MessageId>
RSC_020("RSC-020"),
RSC_021("RSC-021"),
RSC_022("RSC-022"),
RSC_023("RSC-023"),

// Messages relating to scripting
SCP_001("SCP-001"),
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/com/adobe/epubcheck/ops/OPSHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,20 @@ else if (".".equals(href))
URI uri = checkURI(href);
if (uri == null) return;

if ("http".equals(uri.getScheme()))
if ("http".equals(uri.getScheme()) || "https".equals(uri.getScheme()))
{
report.info(path, FeatureEnum.REFERENCE, href);

/*
* #708 report invalid HTTP/HTTPS URLs
* uri.scheme may be correct, but missing a : or a / from the //
* leads to uri.getHost() == null
*/
if (uri.getHost() == null)
{
int missingSlashes = uri.getSchemeSpecificPart().startsWith("/") ? 1 : 2;
report.message(MessageId.RSC_023, parser.getLocation(), uri, missingSlashes, uri.getScheme());
}
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml fi
RSC_020='%1$s' is not a valid URI.
RSC_021=A Search Key Map Document must point to Content Documents ('%1$s' was not found in the spine).
RSC_022=Cannot check image details (requires Java version 7 or higher).
RSC_023=The URL '%1$s' is missing %2$d slash(es) '/' after the protocol '%3$s:'

#Scripting
SCP_001=Use of Javascript eval() function in EPUB scripts is a security risk.
Expand Down
9 changes: 9 additions & 0 deletions src/test/java/com/adobe/epubcheck/ops/OPSCheckerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ public void testValidateXHTMLLINKInvalid()
EPUBVersion.VERSION_3);
}

@Test
public void testValidateXHTMLUrlChecksInvalid()
{
Collections.addAll(expectedErrors, MessageId.RSC_020);
Collections.addAll(expectedWarnings, MessageId.HTM_025, MessageId.RSC_023, MessageId.RSC_023);
testValidateDocument("xhtml/invalid/url-checks_issue-708.xhtml", "application/xhtml+xml",
EPUBVersion.VERSION_3);
}

@Test
public void testValidateXHTMLXml11()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="../../../src/schema/epub-xhtml-30.rnc" type="compact"?>
<?oxygen SCHSchema="../../../src/schema/epub-xhtml-30.sch"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head/>
<body>
<p>
<a href="https://www.youtube .com/watch?v=xxxxxxxxxxx">Invalid URI (RSC-020)</a>
<a href="httpf://www.youtube.com/watch?v=xxxxxxxxxxx">Unsupported URI scheme (HTM-025)</a>
<a href="https:/www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>
<a href="https:www.youtube.com/watch?v=xxxxxxxxxxx">URL is missing slashes after protocol (RSC-023)</a>

<a href="https://www.youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
<a href="https://youtube.com/watch?v=xxxxxxxxxxx">Valid URI</a>
<a href="https://youtube.com/watch?v=xxxxxx%20xxxx">Valid URI</a>
</p>
</body>
</html>