Skip to content

Commit

Permalink
#508 De-duplicate file embeds, logging, PDF/A3 requirements.
Browse files Browse the repository at this point in the history
  • Loading branch information
danfickle committed Feb 1, 2021
1 parent 1c0d240 commit 9d890e0
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ enum LogMessageId1Param implements LogMessageId {
LOAD_COULD_NOT_INSTANTIATE_CUSTOM_XML_READER(XRLog.LOAD, "Could not instantiate custom XMLReader class for XML parsing: {}. " +
"Please check classpath. Use value 'default' in FS configuration if necessary. Will now try JDK default."),
LOAD_UNABLE_TO_LOAD_CSS_FROM_URI(XRLog.LOAD, "Unable to load CSS from {}"),
LOAD_COULD_NOT_LOAD_EMBEDDED_FILE(XRLog.LOAD, "Was not able to load an embedded file for embedding with uri {}"),
LOAD_PARSE_STYLESHEETS_TIME(XRLog.LOAD, "TIME: parse stylesheets {}ms"),
LOAD_REQUESTING_STYLESHEET_AT_URI(XRLog.LOAD, "Requesting stylesheet: {}"),
LOAD_UNRECOGNIZED_IMAGE_FORMAT_FOR_URI(XRLog.LOAD, "Unrecognized image format for: {}"),
Expand Down Expand Up @@ -161,7 +162,8 @@ enum LogMessageId1Param implements LogMessageId {
EXCEPTION_COULD_NOT_LOAD_FONT_FACE(XRLog.EXCEPTION, "Could not load @font-face font: {}"),
EXCEPTION_COULD_NOT_LOAD_DEFAULT_CSS(XRLog.EXCEPTION, "Can't load default CSS from {}. This file must be on your CLASSPATH. Please check before continuing."),
EXCEPTION_DEFAULT_USERAGENT_IS_NOT_ABLE_TO_RESOLVE_BASE_URL_FOR(XRLog.EXCEPTION, "The default NaiveUserAgent doesn't know how to resolve the base URL for {}"),
EXCEPTION_FAILED_TO_LOAD_BACKGROUND_IMAGE_AT_URI(XRLog.EXCEPTION, "Failed to load background image at uri {}");
EXCEPTION_FAILED_TO_LOAD_BACKGROUND_IMAGE_AT_URI(XRLog.EXCEPTION, "Failed to load background image at uri {}"),
EXCEPTION_COULD_NOT_LOAD_EMBEDDED_FILE(XRLog.EXCEPTION, "Was not able to create an embedded file for embedding with uri {}");

private final String where;
private final String messageFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import java.util.stream.IntStream;

import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
Expand All @@ -37,7 +40,6 @@
import org.apache.pdfbox.util.Charsets;
import org.hamcrest.CustomTypeSafeMatcher;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;

import com.openhtmltopdf.outputdevice.helper.ExternalResourceControlPriority;
Expand Down Expand Up @@ -1070,16 +1072,22 @@ public void testIssue508FileEmbed() throws IOException {
builder.useExternalResourceAccessControl((uri, type) -> true, ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI);
builder.useExternalResourceAccessControl((uri, type) -> true, ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI);
})) {
// TODO: Renable this assertion when we have figured out a way
// to avoid duplicate file embeds when the link is broken
// up into boxes (eg. multiple lines).
// assertThat(doc.getPage(0).getAnnotations().size(), equalTo(1));

PDAnnotationFileAttachment fileAttach = (PDAnnotationFileAttachment) doc.getPage(0).getAnnotations().get(0);
assertThat(fileAttach.getFile().getFile(), equalTo("basic.css"));
// There should be multiple file attachment annotations because the link
// is broken into two boxes on multiple lines.
assertThat(doc.getPage(0).getAnnotations().size(), equalTo(2));

// TODO:
// More asserts.
PDAnnotationFileAttachment fileAttach1 = (PDAnnotationFileAttachment) doc.getPage(0).getAnnotations().get(0);
assertThat(fileAttach1.getFile().getFile(), equalTo("basic.css"));

PDAnnotationFileAttachment fileAttach2 = (PDAnnotationFileAttachment) doc.getPage(0).getAnnotations().get(1);
assertThat(fileAttach2.getFile().getFile(), equalTo("basic.css"));

try (COSDocument cosDoc = doc.getDocument()) {
// Make sure the file is only embedded once.
List<COSObject> files = cosDoc.getObjectsByType(COSName.FILESPEC);
assertThat(files.size(), equalTo(1));
}

remove("issue-508-file-embed", doc);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.verapdf.pdfa.results.TestAssertion;
import org.verapdf.pdfa.results.ValidationResult;

import com.openhtmltopdf.outputdevice.helper.ExternalResourceControlPriority;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.PdfAConformance;

Expand Down Expand Up @@ -65,7 +66,11 @@ public boolean run(String resource, PDFAFlavour flavour, PdfAConformance conform
builder.usePdfAConformance(conform);
builder.useFont(new File("target/test/artefacts/Karla-Bold.ttf"), "TestFont");
builder.withHtmlContent(html, PdfATester.class.getResource("/html/").toString());


// File embeds are blocked by default, allow everything.
builder.useExternalResourceAccessControl((uri, type) -> true, ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI);
builder.useExternalResourceAccessControl((uri, type) -> true, ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI);

try (InputStream colorProfile = PdfATester.class.getResourceAsStream("/colorspaces/sRGB.icc")) {
byte[] colorProfileBytes = IOUtils.toByteArray(colorProfile);
builder.useColorProfile(colorProfileBytes);
Expand Down Expand Up @@ -128,5 +133,12 @@ public void testAllInOnePdfA2a() throws Exception {
public void testAllInOnePdfA2u() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_U, PdfAConformance.PDFA_2_U));
}


/**
* File embedding is allowed as of PDF/A3.
*/
@Test
public void testFileEmbedA3b() throws Exception {
assertTrue(run("file-embed", PDFAFlavour.PDFA_3_B, PdfAConformance.PDFA_3_B));
}
}
36 changes: 36 additions & 0 deletions openhtmltopdf-pdfa-testing/src/test/resources/html/file-embed.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<html lang="EN-US">
<head>
<title>File embed testcase</title>
<meta name="subject" content="PDF/A3 file embed"/>
<meta name="author" content="openhtmltopdf.com team"/>
<meta name="description" content="An example for file embed"/>

<bookmarks>
<bookmark name="File embed" href="#file"/>
</bookmarks>

<style>
body {
margin: 0;
font-family: 'TestFont'; /* Font provided with builder. */
font-size: 15px;
}
</style>
</head>
<body>
<h1 id="file">File embed example</h1>

<p>
<a href="file-embed.html"
download="source.html"
data-content-type="text/html"
title="File embedded"
relationship="Source">

File embed

</a>
</p>

</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.XRLog;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
Expand Down Expand Up @@ -50,7 +52,19 @@ public class PdfBoxFastLinkManager {
private final Box _root;
private final PdfBoxFastOutputDevice _od;
private final List<LinkDetails> _links;
private PdfBoxAccessibilityHelper _pdfUa;
private PdfBoxAccessibilityHelper _pdfUa;

/**
* A map from uri to embedded file, so we don't embed files twice
* in case of a split link (example, two link boxes are formed when
* a link breaks in the middle).
*/
private final Map<String, PDComplexFileSpecification> _embeddedFiles;

/**
* The lazily created appearance dict for emedded files.
*/
private PDAppearanceDictionary _embeddedFileAppearance;

public PdfBoxFastLinkManager(SharedContext ctx, float dotsPerPoint, Box root, PdfBoxFastOutputDevice od) {
this._sharedContext = ctx;
Expand All @@ -59,6 +73,7 @@ public PdfBoxFastLinkManager(SharedContext ctx, float dotsPerPoint, Box root, Pd
this._od = od;
this._linkTargetAreas = new HashMap<>();
this._links = new ArrayList<>();
this._embeddedFiles = new HashMap<>();
}

private Rectangle2D calcTotalLinkArea(RenderingContext c, Box box, float pageHeight, AffineTransform transform) {
Expand Down Expand Up @@ -274,51 +289,112 @@ private void addUriAsLink(RenderingContext c, Box box, PDPage page, float pageHe
}
}

/**
* Create a file attachment link, being careful not to embed the same
* file (as specified by uri) more than once.
*
* The element should have the following attributes:
* download="embedded-filename.ext",
* data-content-type="file-mime-type" which
* defaults to "application/octet-stream",
* relationship (required for PDF/A3), one of:
* "Source", "Supplement", "Data", "Alternative", "Unspecified",
* title="file description" (recommended for PDF/A3).
*/
private AnnotationContainer createFileEmbedLinkAnnotation(
Element elem, String uri) {
PDComplexFileSpecification fs = _embeddedFiles.get(uri);

if (fs != null) {
PDAnnotationFileAttachment annotationFileAttachment = new PDAnnotationFileAttachment();

annotationFileAttachment.setFile(fs);
annotationFileAttachment.setAppearance(this._embeddedFileAppearance);

return new AnnotationContainer.PDAnnotationFileAttachmentContainer(annotationFileAttachment);
}

byte[] file = _sharedContext.getUserAgentCallback().getBinaryResource(uri, ExternalResourceType.FILE_EMBED);

if (file != null) {
try {
PDComplexFileSpecification fs = new PDComplexFileSpecification();
PDEmbeddedFile embeddedFile = new PDEmbeddedFile(_od.getWriter(), new ByteArrayInputStream(file));

String contentType = elem.getAttribute("data-content-type").isEmpty() ?
"application/octet-stream" :
elem.getAttribute("data-content-type");

PDEmbeddedFile embeddedFile = new PDEmbeddedFile(_od.getWriter(), new ByteArrayInputStream(file));
embeddedFile.setSubtype(contentType);
embeddedFile.setSize(file.length);

fs.setEmbeddedFile(embeddedFile);
// PDF/A3 requires a mod date for the file.
if (elem.hasAttribute("relationship")) {
// FIXME: Should we make this specifiable.
embeddedFile.setModDate(Calendar.getInstance());
}

String fileName = elem.getAttribute("download");

fs = new PDComplexFileSpecification();
fs.setEmbeddedFile(embeddedFile);
fs.setFile(fileName);
fs.setFileUnicode(fileName);

// The PDF/A3 standard requires one to specify the relationship
// this embedded file has to the link annotation.
if (elem.hasAttribute("relationship") &&
Arrays.asList("Source", "Supplement", "Data", "Alternative", "Unspecified")
.contains(elem.getAttribute("relationship"))) {
fs.getCOSObject().setItem(
COSName.getPDFName("AFRelationship"),
COSName.getPDFName(elem.getAttribute("relationship")));
}

if (elem.hasAttribute("title")) {
fs.setFileDescription(elem.getAttribute("title"));
}

this._embeddedFiles.put(uri, fs);

if (this._embeddedFileAppearance == null) {
this._embeddedFileAppearance = createFileEmbedLinkAppearance();
}

PDAnnotationFileAttachment annotationFileAttachment = new PDAnnotationFileAttachment();

annotationFileAttachment.setFile(fs);
annotationFileAttachment.setAppearance(this._embeddedFileAppearance);

// PDF/A3 requires we explicitly list this link as associated with file.
if (elem.hasAttribute("relationship")) {
COSArray fileRefArray = new COSArray();
fileRefArray.add(fs);

// hide the pin icon used by various pdf reader for signaling an embedded file
PDAppearanceDictionary appearanceDictionary = new PDAppearanceDictionary();
PDAppearanceStream appearanceStream = new PDAppearanceStream(_od.getWriter());
appearanceStream.setResources(new PDResources());
appearanceDictionary.setNormalAppearance(appearanceStream);
annotationFileAttachment.setAppearance(appearanceDictionary);
annotationFileAttachment.getCOSObject().setItem(COSName.getPDFName("AF"), fileRefArray);
}

return new AnnotationContainer.PDAnnotationFileAttachmentContainer(annotationFileAttachment);
} catch (IOException e) {
// TODO
//XRLog.exception("Was not able to create an embedded file for embedding with uri " + uri, e);
XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.EXCEPTION_COULD_NOT_LOAD_EMBEDDED_FILE, uri, e);
}
} else {
// TODO
//XRLog.general("Was not able to load file from uri for embedding" + uri);
XRLog.log(Level.WARNING, LogMessageId.LogMessageId1Param.LOAD_COULD_NOT_LOAD_EMBEDDED_FILE, uri);
}

return null;
}

/**
* Create an empty appearance stream to
* hide the pin icon used by various pdf reader for signaling an embedded file
*/
private PDAppearanceDictionary createFileEmbedLinkAppearance() {
PDAppearanceDictionary appearanceDictionary = new PDAppearanceDictionary();
PDAppearanceStream appearanceStream = new PDAppearanceStream(_od.getWriter());
appearanceStream.setResources(new PDResources());
appearanceDictionary.setNormalAppearance(appearanceStream);
return appearanceDictionary;
}

private static boolean isURI(String uri) {
try {
return URI.create(uri) != null;
Expand Down

0 comments on commit 9d890e0

Please sign in to comment.