Skip to content

Commit

Permalink
Check if a file contains a file link.
Browse files Browse the repository at this point in the history
  • Loading branch information
kalaspuffar authored and tofi86 committed Nov 27, 2017
1 parent 5e33645 commit 8f7a2b7
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 33 deletions.
32 changes: 2 additions & 30 deletions src/main/java/com/adobe/epubcheck/ctc/EntitySearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,8 @@
import com.adobe.epubcheck.ocf.EncryptionFilter;
import com.adobe.epubcheck.util.EPUBVersion;

class EntitySearch
class EntitySearch extends TextSearch
{
private final ZipFile zip;
private final Hashtable<String, EncryptionFilter> enc;
private final Report report;
private final EPUBVersion version;

static final Pattern entityPattern = Pattern.compile("&([A-Za-z0-9]+)([;|\\s])");
static final HashSet<String> legalEntities2_0;
static final HashSet<String> legalEntities3_0;
Expand Down Expand Up @@ -71,32 +66,9 @@ class EntitySearch

public EntitySearch(EPUBVersion version, ZipFile zip, Report report)
{
this.zip = zip;
this.enc = new Hashtable<String, EncryptionFilter>();
this.report = report;
this.version = version;
super(version, zip, report);
}

InputStream getInputStream(String name) throws
IOException
{
ZipEntry entry = zip.getEntry(name);
if (entry == null)
{
return null;
}
InputStream in = zip.getInputStream(entry);
EncryptionFilter filter = enc.get(name);
if (filter == null)
{
return in;
}
if (filter.canDecrypt())
{
return filter.decrypt(in);
}
return null;
}

public Vector<String> Search(String entry)
{
Expand Down
14 changes: 11 additions & 3 deletions src/main/java/com/adobe/epubcheck/ctc/EpubTextContentCheck.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.adobe.epubcheck.ctc;

import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;

import com.adobe.epubcheck.api.Report;
Expand All @@ -13,12 +15,14 @@ public class EpubTextContentCheck implements DocumentValidator
{
private final Report report;
private final EpubPackage epack;
private final EntitySearch search;
private final List<TextSearch> search;

public EpubTextContentCheck(Report report, EpubPackage epack)
{
this.epack = epack;
this.search = new EntitySearch(epack.getVersion(), epack.getZip(), report);
this.search = new ArrayList<TextSearch>();
this.search.add(new EntitySearch(epack.getVersion(), epack.getZip(), report));
this.search.add(new FileLinkSearch(epack.getVersion(), epack.getZip(), report));
this.report = report;
}

Expand All @@ -40,7 +44,11 @@ public boolean validate()
// report.message(MessageId.RSC_001, EPUBLocation.create(this.epack.getFileName()), fileToParse);
continue;
}
this.search.Search(fileToParse);

for(TextSearch ts : this.search)
{
ts.Search(fileToParse);
}
}
}
return true;
Expand Down
82 changes: 82 additions & 0 deletions src/main/java/com/adobe/epubcheck/ctc/FileLinkSearch.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.adobe.epubcheck.ctc;

import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.util.EPUBVersion;

import java.io.*;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipFile;

public class FileLinkSearch extends TextSearch {
private static final Pattern fileLinkPattern = Pattern.compile("href=[\"']file://");

public FileLinkSearch(EPUBVersion version, ZipFile zip, Report report)
{
super(version, zip, report);
}

@Override
Vector<String> Search(String entry)
{
String fileName = new File(zip.getName()).getName();
InputStream is = null;
BufferedReader br = null;
try
{
is = getInputStream(entry);
br = new BufferedReader(new InputStreamReader(is));

int lineCounter = 1;
String line;
while ((line = br.readLine()) != null)
{
Matcher matcher = fileLinkPattern.matcher(line);
int position = 0;
while (matcher.find(position))
{
int contextStart = Math.max(0, matcher.start() - 20);
int contextEnd = Math.min(contextStart + 40, line.length() - 1);
String context = line.substring(contextStart, contextEnd);

report.message(MessageId.HTM_053, EPUBLocation.create(entry, lineCounter, matcher.start(), context.trim()), context.trim());
position = matcher.end();
}
lineCounter++;
}
}
catch (FileNotFoundException e1)
{
report.message(MessageId.RSC_001, EPUBLocation.create(fileName), entry);
}
catch (IOException e1)
{
report.message(MessageId.PKG_008, EPUBLocation.create(fileName), entry);
}
catch (Exception e)
{
e.printStackTrace();
report.message(MessageId.RSC_005, EPUBLocation.create(entry), e.getMessage());
}
finally
{
silentlyClose(br);
silentlyClose(is);
}
return new Vector<String>();
}

private void silentlyClose(Closeable c) {
try
{
c.close();
}
catch (IOException ignored)
{
}
}
}
50 changes: 50 additions & 0 deletions src/main/java/com/adobe/epubcheck/ctc/TextSearch.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.adobe.epubcheck.ctc;

import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.ocf.EncryptionFilter;
import com.adobe.epubcheck.util.EPUBVersion;

import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Vector;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

abstract class TextSearch {
private final Hashtable<String, EncryptionFilter> enc;
final ZipFile zip;
final Report report;
final EPUBVersion version;


public TextSearch(EPUBVersion version, ZipFile zip, Report report)
{
this.zip = zip;
this.enc = new Hashtable<String, EncryptionFilter>();
this.report = report;
this.version = version;
}

InputStream getInputStream(String name) throws IOException
{
ZipEntry entry = zip.getEntry(name);
if (entry == null)
{
return null;
}
InputStream in = zip.getInputStream(entry);
EncryptionFilter filter = enc.get(name);
if (filter == null)
{
return in;
}
if (filter.canDecrypt())
{
return filter.decrypt(in);
}
return null;
}

abstract Vector<String> Search(String entry);
}
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ static Map<MessageId, Severity> getDefaultSeverities()
map.put(MessageId.HTM_050, Severity.USAGE);
map.put(MessageId.HTM_051, Severity.WARNING);
map.put(MessageId.HTM_052, Severity.ERROR);
map.put(MessageId.HTM_053, Severity.INFO);

// Media
map.put(MessageId.MED_001, Severity.ERROR);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ public enum MessageId implements Comparable<MessageId>
HTM_050("HTM-050"),
HTM_051("HTM-051"),
HTM_052("HTM-052"),
HTM_053("HTM_053"),

// Messages associated with media (images, audio and video)
MED_001("MED-001"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ HTM_049_SUG=Add xmlns="http://www.w3.org/1999/xhtml" to the html element.
HTM_050=Found epub:type="pagebreak" attribute in content document.
HTM_051=Found Microdata semantic enrichments but no RDFa. EDUPUB recommends using RDFa Lite.
HTM_052=The property 'region-based' is only allowed on nav elements in Data Navigation Documents.
HTM_053=Found an external file link (file://) in file: '%1$s'.

#media
MED_001=Video poster must have core media image type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
package com.adobe.epubcheck.api;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.matchers.JUnitMatchers.hasItems;

import java.io.BufferedReader;
import java.io.File;
Expand Down Expand Up @@ -54,6 +56,8 @@ public abstract class AbstractEpubCheckTest
List<MessageId> expectedWarnings = new LinkedList<MessageId>();
List<MessageId> expectedErrors = new LinkedList<MessageId>();
List<MessageId> expectedFatals = new LinkedList<MessageId>();
List<MessageId> expectedInfos = new LinkedList<MessageId>();


protected AbstractEpubCheckTest(String basepath)
{
Expand Down Expand Up @@ -148,6 +152,9 @@ public void testValidateDocument(String fileName, String resultFile, EPUBProfile
IdsToListOfString(testReport.getWarningIds()));
assertEquals("The fatal error results do not match", IdsToListOfString(expectedFatals),
IdsToListOfString(testReport.getFatalErrorIds()));
assertThat("The info results do not match",
IdsToListOfString(testReport.getInfoIds()),
hasItems(IdsToListOfString(expectedInfos).toArray(new String[expectedInfos.size()])));

if (resultFile != null)
{
Expand Down
7 changes: 7 additions & 0 deletions src/test/java/com/adobe/epubcheck/api/Epub30CheckTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,13 @@ public void testValidateEPUB30Issue221()
testValidateDocument("invalid/issue221.epub");
}

@Test
public void testValidateEPUB30Issue289()
{
Collections.addAll(expectedInfos, MessageId.HTM_053);
testValidateDocument("valid/issue289.epub");
}

@Test
public void testValidateEPUB30FontObfuscation()
{
Expand Down
23 changes: 23 additions & 0 deletions src/test/java/com/adobe/epubcheck/util/ValidationReport.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ else if (message.getSeverity().equals(Severity.FATAL))
{
fatalError(PathUtil.removeWorkingDirectory(location.getPath()), location.getLine(), location.getColumn(), message.getMessage(args), message.getID());
}
else if (message.getSeverity().equals(Severity.INFO))
{
info(PathUtil.removeWorkingDirectory(location.getPath()), location.getLine(), location.getColumn(), message.getMessage(args), message.getID());
}
}

private void error(String resource, int line, int column, String message, MessageId id)
Expand All @@ -117,6 +121,12 @@ private void warning(String resource, int line, int column, String message, Mess
warningList.add(item);
}

public void info(String resource, int line, int column, String message, MessageId id)
{
ItemReport item = new ItemReport(resource, line, column, fixMessage(message), id);
getInfoList().add(item);
}

public String toString()
{
StringBuilder buffer = new StringBuilder();
Expand Down Expand Up @@ -233,6 +243,18 @@ public int generate()
return 0;
}

public List<MessageId> getInfoIds()
{
List<MessageId> result = new ArrayList<MessageId>();
for (ItemReport it : infoList)
{
if(it.id != null) {
result.add(it.id);
}
}
return result;
}

public List<MessageId> getWarningIds()
{
List<MessageId> result = new ArrayList<MessageId>();
Expand Down Expand Up @@ -262,4 +284,5 @@ public List<MessageId> getFatalErrorIds()
}
return result;
}

}
Binary file added src/test/resources/30/epub/valid/issue289.epub
Binary file not shown.

0 comments on commit 8f7a2b7

Please sign in to comment.