Skip to content

Commit

Permalink
#326 - We are now PDF/A1b, PDF/A2b and PDF/A2u compliant.
Browse files Browse the repository at this point in the history
Next up is the a variants.
  • Loading branch information
danfickle committed Mar 3, 2019
1 parent 41d5aa8 commit 20605f2
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
import java.util.stream.Collectors;

import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
Expand All @@ -31,7 +29,6 @@
import org.verapdf.pdfa.results.TestAssertion;
import org.verapdf.pdfa.results.ValidationResult;

import com.openhtmltopdf.pdfboxout.PdfBoxRenderer;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.PdfAConformance;

Expand Down Expand Up @@ -62,32 +59,22 @@ public boolean run(String resource, PDFAFlavour flavour, PdfAConformance conform

byte[] pdfBytes;

try (PDDocument doc = new PDDocument()) {
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.usePDDocument(doc);
builder.useFastMode();
//builder.testMode(true);
builder.testMode(true);
builder.usePdfAConformance(conform);
builder.useFont(new File("target/test/artefacts/Karla-Bold.ttf"), "TestFont");
builder.withHtmlContent(html, PdfATester.class.getResource("/html/").toString());

try (PdfBoxRenderer renderer = builder.buildPdfRenderer()) {
renderer.createPDFWithoutClosing();
}

try (InputStream colorProfile = PdfATester.class.getResourceAsStream("/colorspaces/sRGB.icc")) {
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);
oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");
doc.getDocumentCatalog().addOutputIntent(oi);
byte[] colorProfileBytes = IOUtils.toByteArray(colorProfile);
builder.useColorProfile(colorProfileBytes);
}

ByteArrayOutputStream baos = new ByteArrayOutputStream();
doc.save(baos);
builder.toStream(baos);
builder.run();
pdfBytes = baos.toByteArray();
}

Files.createDirectories(Paths.get("target/test/pdf/"));
Files.write(Paths.get("target/test/pdf/" + resource + "--" + flavour + ".pdf"), pdfBytes);
Expand All @@ -113,31 +100,32 @@ public boolean run(String resource, PDFAFlavour flavour, PdfAConformance conform
}
}

@Ignore // Failing, multiple. See issue number 326.
/**
* PDF/A conformance. Issue 326.
* NOTE: PDF/A1 standards do not support alpha in images.
*/
@Test
public void testAllInOnePdfA1b() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_B, PdfAConformance.PDFA_1_B));
assertTrue(run("all-in-one-no-alpha", PDFAFlavour.PDFA_1_B, PdfAConformance.PDFA_1_B));
}

@Ignore
@Ignore // Failing because we haven't set up structure tagging for PDF/A1a standard.
@Test
public void testAllInOnePdfA1a() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_1_A, PdfAConformance.PDFA_1_A));
assertTrue(run("all-in-one-no-alpha", PDFAFlavour.PDFA_1_A, PdfAConformance.PDFA_1_A));
}

@Ignore
@Test
public void testAllInOnePdfA2b() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_B, PdfAConformance.PDFA_2_B));
}

@Ignore
@Ignore // Failing because we haven't set up structure tagging for PDF/A2a standard.
@Test
public void testAllInOnePdfA2a() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_A, PdfAConformance.PDFA_2_A));
}

@Ignore
@Test
public void testAllInOnePdfA2u() throws Exception {
assertTrue(run("all-in-one", PDFAFlavour.PDFA_2_U, PdfAConformance.PDFA_2_U));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<html lang="EN-US">
<head>
<title>All-in-one PDF/UA Testcase</title>
<meta name="subject" content="PDF/UA all-in-one"/>
<meta name="author" content="openhtmltopdf.com team"/>
<meta name="description" content="An example containing everything for easy testing"/>

<bookmarks>
<bookmark name="Simple Paragraphs" href="#para"/>
<bookmark name="Lists" href="#lists">
<bookmark name="Ordered" href="#ordered"/>
<bookmark name="Unordered" href="#unordered"/>
</bookmark>
<bookmark name="Images" href="#images"/>
<bookmark name="Links" href="#links"/>
<bookmark name="Tables" href="#tables"/>
<bookmark name="Backgrounds" href="#backgrounds"/>
<bookmark name="Conclusion" href="#conclusion"/>
</bookmarks>

<style>
@page {
margin: 30px 20px;

@top-center {
font-family: 'TestFont'; /* Font provided with builder. */
font-size: 16px;
color: blue;
content: "This is PDF/UA page " counter(page) " of " counter(pages) ".";
}
}
body {
margin: 0;
font-family: 'TestFont'; /* Font provided with builder. */
font-size: 15px;
}
</style>
</head>
<body style="">
<h1 id="title">All-in-one accessible (PDF/UA, Section 508, WCAG) PDF example</h1>

<h2 id="para">Simple paragraphs</h2>

<p>Paragraph one. Some text that goes over multiple lines. OK, this is getting to the required length. Need another sentence to get there in the end.</p>
<p>Paragraph two. Some text that goes over multiple lines. OK, this is getting to the required length. Need another sentence to get there in the end.</p>
<p>Paragraph three. Some text that goes over multiple lines. OK, this is getting to the required length. Need another sentence to get there in the end.</p>

<h2 id="lists">Lists</h2>

<h3 id="ordered">Ordered</h3>
<ol>
<li>One</li>
<li>Two</li>
<li>Three</li>
</ol>

<h3 id="unordered">Unordered</h3>
<ul>
<li>Bullet item one</li>
<li>And two</li>
<li>And three</li>
</ul>

<h2 id="images">Images</h2>
<img src="../images/flyingsaucer-no-alpha.png" alt="The FlyingSaucer logo. We should get our own!"/>

<h2 id="links">Links</h2>
<p>This is an external link to the project <a title="The openhtmltopdf.com homepage" href="https://openhtmltopdf.com">homepage</a>.</p>
<p>This is an internal link to the <a title="Go to top" href="#title">top</a> of the document.</p>

<h2 id="tables">Tables</h2>
<table>
<caption>Simple table example with fake data</caption>

<thead>
<tr><th>Col One</th><th>Col Two</th></tr>
</thead>

<tbody>
<tr><td>One</td><td>Two</td></tr>
<tr><td>Three</td><td>Four</td></tr>
<tr><td>Five</td><td>Six</td></tr>
</tbody>

<tfoot>
<tr><td>Footer1</td><td>Footer2</td></tr>
<tr><td>Footer3</td><td>Footer4</td></tr>
</tfoot>
</table>

<h2 id="backgrounds">Backgrounds</h2>
<div style="background-color: red; height: 40px; border-radius: 10px; border: 1px solid gray;">
<p>Some text on a background. Remember to use a good contrast if using background colors.</p>
</div>

<h2 id="conclusion">Conclusion</h2>
<p>Remember to keep it simple for PDF/UA compliance.</p>

</body>
</html>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,11 @@ private boolean placeAnnotation(AffineTransform transform, Shape linkShape, Rect
PDAnnotationLink annot) {
annot.setRectangle(new PDRectangle((float) targetArea.getMinX(), (float) targetArea.getMinY(),
(float) targetArea.getWidth(), (float) targetArea.getHeight()));

// PDF/A standard requires the print flag to be set and there shouldn't
// be any harm in setting it for other documents.
annot.setPrinted(true);

if (linkShape != null) {
float[] quadPoints = mapShapeToQuadPoints(transform, linkShape, targetArea);
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,13 @@ private PageState copy() {
private PdfBoxAccessibilityHelper _pdfUa;
private final boolean _pdfUaConform;

public PdfBoxFastOutputDevice(float dotsPerPoint, boolean testMode, boolean pdfUaConform) {
private final boolean _pdfAConform;

public PdfBoxFastOutputDevice(float dotsPerPoint, boolean testMode, boolean pdfUaConform, boolean pdfAConform) {
_dotsPerPoint = dotsPerPoint;
_testMode = testMode;
_pdfUaConform = pdfUaConform;
_pdfAConform = pdfAConform;
}

@Override
Expand Down Expand Up @@ -796,7 +799,10 @@ public void drawImage(FSImage fsImage, int x, int y, boolean interpolate) {

PDImageXObject xobject = img.getXObject();
if (interpolate) {
// PDF/A does not support setting the interpolate flag to true.
if (!_pdfAConform) {
xobject.setInterpolate(true);
}
} else {
/*
* Specialcase for not interpolating an image, default is to always interpolate.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkInfo;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.schema.XMPSchema;
Expand Down Expand Up @@ -150,7 +150,9 @@ public class PdfBoxRenderer implements Closeable {
_dotsPerPoint = DEFAULT_DOTS_PER_POINT;
_testMode = state._testMode;
_useFastMode = state._useFastRenderer;
_outputDevice = state._useFastRenderer ? new PdfBoxFastOutputDevice(DEFAULT_DOTS_PER_POINT, _testMode, state._pdfUaConform) : new PdfBoxSlowOutputDevice(DEFAULT_DOTS_PER_POINT, _testMode);
_outputDevice = state._useFastRenderer ?
new PdfBoxFastOutputDevice(DEFAULT_DOTS_PER_POINT, _testMode, state._pdfUaConform, state._pdfAConformance != PdfAConformance.NONE) :
new PdfBoxSlowOutputDevice(DEFAULT_DOTS_PER_POINT, _testMode);
_outputDevice.setWriter(_pdfDoc);
_outputDevice.setStartPageNo(_pdfDoc.getNumberOfPages());

Expand Down Expand Up @@ -754,36 +756,55 @@ private void addPdfASchema(PDDocument document, int part, String conformance) {
XMPMetadata metadata = XMPMetadata.createXMPMetadata();

try {
String title = information.getTitle();
String author = information.getAuthor();
String subject = information.getSubject();
String keywords = information.getKeywords();
String producer = information.getProducer();

// NOTE: The XMP metadata MUST match up with the document information dictionary
// to be a valid PDF/A document.

PDFAIdentificationSchema pdfaid = metadata.createAndAddPFAIdentificationSchema();
pdfaid.setConformance(conformance);
pdfaid.setPart(part);

AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setProducer(information.getProducer());

if (keywords != null) {
pdfSchema.setKeywords(keywords);
}
if (producer != null) {
pdfSchema.setProducer(producer);
}

XMPBasicSchema xmpBasicSchema = metadata.createAndAddXMPBasicSchema();
xmpBasicSchema.setCreateDate(information.getCreationDate());


DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema();
if (author != null) {
dc.addCreator(author);
}
if (title != null) {
dc.setTitle(title);
}
if (subject != null) {
dc.setDescription(subject);
}

PDMetadata metadataStream = new PDMetadata(document);
PDMarkInfo markInfo = new PDMarkInfo();
markInfo.setMarked(true);

// add to catalog
PDDocumentCatalog catalog = document.getDocumentCatalog();
catalog.setMetadata(metadataStream);
// for pdf/a-1 compliance, add the StructTreeRoot that https://www.pdf-online.com/osa/validate.aspx was
// complaining. Based on https://stackoverflow.com/a/46806392
catalog.setStructureTreeRoot(new PDStructureTreeRoot());
//

catalog.setMarkInfo(markInfo);

XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(metadata, baos, true);
metadataStream.importXMPMetadata( baos.toByteArray() );


if (_colorProfile != null) {
ByteArrayInputStream colorProfile = new ByteArrayInputStream(_colorProfile);
PDOutputIntent oi = new PDOutputIntent(document, colorProfile);
Expand Down

0 comments on commit 20605f2

Please sign in to comment.