Skip to content

Commit

Permalink
PAGE 2019
Browse files Browse the repository at this point in the history
PAGE XML 2019-07-15
  • Loading branch information
chris1010010 committed Sep 3, 2019
1 parent 0eddb79 commit e209c9d
Show file tree
Hide file tree
Showing 15 changed files with 51 additions and 35 deletions.
4 changes: 2 additions & 2 deletions apidoc/allclasses-frame.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>All Classes (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/allclasses-noframe.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>All Classes (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/constant-values.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Constant Field Values (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/deprecated-list.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Deprecated List (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/help-doc.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>API Help (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
6 changes: 3 additions & 3 deletions apidoc/index-all.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Index (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down Expand Up @@ -103,7 +103,7 @@ <h2 class="title">P</h2>
</a>
<h2 class="title">R</h2>
<dl>
<dt><span class="memberNameLink"><a href="org/primaresearch/dla/page/converter/PageConverter.html#run-java.lang.String-java.lang.String-">run(String, String)</a></span> - Method in class org.primaresearch.dla.page.converter.<a href="org/primaresearch/dla/page/converter/PageConverter.html" title="class in org.primaresearch.dla.page.converter">PageConverter</a></dt>
<dt><span class="memberNameLink"><a href="org/primaresearch/dla/page/converter/PageConverter.html#run-java.lang.String-java.lang.String-boolean-">run(String, String, boolean)</a></span> - Method in class org.primaresearch.dla.page.converter.<a href="org/primaresearch/dla/page/converter/PageConverter.html" title="class in org.primaresearch.dla.page.converter">PageConverter</a></dt>
<dd>
<div class="block">Runs the conversion</div>
</dd>
Expand Down
2 changes: 1 addition & 1 deletion apidoc/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>JPageConverter API</title>
<script type="text/javascript">
tmpTargetPage = "" + window.location.search;
Expand Down
14 changes: 8 additions & 6 deletions apidoc/org/primaresearch/dla/page/converter/PageConverter.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>PageConverter (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
Expand Down Expand Up @@ -158,8 +158,9 @@ <h3>Method Summary</h3>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/primaresearch/dla/page/converter/PageConverter.html#run-java.lang.String-java.lang.String-">run</a></span>(java.lang.String&nbsp;sourceFilename,
java.lang.String&nbsp;targetFilename)</code>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/primaresearch/dla/page/converter/PageConverter.html#run-java.lang.String-java.lang.String-boolean-">run</a></span>(java.lang.String&nbsp;sourceFilename,
java.lang.String&nbsp;targetFilename,
boolean&nbsp;json)</code>
<div class="block">Runs the conversion</div>
</td>
</tr>
Expand Down Expand Up @@ -243,14 +244,15 @@ <h4>main</h4>
</dl>
</li>
</ul>
<a name="run-java.lang.String-java.lang.String-">
<a name="run-java.lang.String-java.lang.String-boolean-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>run</h4>
<pre>public&nbsp;void&nbsp;run(java.lang.String&nbsp;sourceFilename,
java.lang.String&nbsp;targetFilename)</pre>
java.lang.String&nbsp;targetFilename,
boolean&nbsp;json)</pre>
<div class="block">Runs the conversion</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Uses of Class org.primaresearch.dla.page.converter.PageConverter (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
</head>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>org.primaresearch.dla.page.converter (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>org.primaresearch.dla.page.converter (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/org/primaresearch/dla/page/converter/package-tree.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>org.primaresearch.dla.page.converter Class Hierarchy (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/org/primaresearch/dla/page/converter/package-use.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Uses of Package org.primaresearch.dla.page.converter (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
</head>
Expand Down
4 changes: 2 additions & 2 deletions apidoc/overview-tree.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Thu Jan 10 16:46:09 GMT 2019 -->
<!-- Generated by javadoc (1.8.0_121) on Tue Sep 03 16:49:39 BST 2019 -->
<title>Class Hierarchy (JPageConverter API)</title>
<meta name="date" content="2019-01-10">
<meta name="date" content="2019-09-03">
<link rel="stylesheet" type="text/css" href="stylesheet.css" title="Style">
<script type="text/javascript" src="script.js"></script>
</head>
Expand Down
20 changes: 17 additions & 3 deletions src/org/primaresearch/dla/page/converter/PageConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import java.io.File;

import org.primaresearch.dla.page.Page;
import org.primaresearch.dla.page.io.FileInput;
import org.primaresearch.dla.page.io.json.GoogleJsonPageReader;
import org.primaresearch.dla.page.io.xml.PageXmlInputOutput;
import org.primaresearch.dla.page.layout.physical.ContentObject;
import org.primaresearch.dla.page.layout.physical.ContentObjectProcessor;
Expand Down Expand Up @@ -59,12 +61,19 @@ public static void main(String[] args) {

//Parse arguments
String sourceFilename = null;
boolean json = false;
String targetFilename = null;
String gtsidPattern = null;
String textFilterRuleFile = null;
for (int i=0; i<args.length; i++) {
if ("-source-xml".equals(args[i])) {
i++;
json = false;
sourceFilename = args[i];
}
else if ("-source-json".equals(args[i])) {
i++;
json = true;
sourceFilename = args[i];
}
else if ("-target-xml".equals(args[i])) {
Expand Down Expand Up @@ -103,7 +112,7 @@ else if ("-text-filter".equals(args[i])) {
}

//Run conversion
converter.run(sourceFilename, targetFilename);
converter.run(sourceFilename, targetFilename, json);
}

/**
Expand All @@ -117,6 +126,8 @@ private static void showUsage() {
System.out.println("Arguments:");
System.out.println("");
System.out.println(" -source-xml <XML file> PAGE XML file to convert.");
System.out.println(" OR");
System.out.println(" -source-json <JSON file> JSON file to convert (e.g. Google Cloud Vision output).");
System.out.println("");
System.out.println(" -target-xml <XML file> Output PAGE XML file.");
System.out.println("");
Expand Down Expand Up @@ -144,11 +155,14 @@ private static void showUsage() {
* @param sourceFilename File path of input PAGE XML
* @param targetFilename File path to output PAGE XML
*/
public void run(String sourceFilename, String targetFilename) {
public void run(String sourceFilename, String targetFilename, boolean json) {
//Load
Page page = null;
try {
page = PageXmlInputOutput.readPage(sourceFilename);
if (json)
page = new GoogleJsonPageReader().read(new FileInput(new File(sourceFilename)));
else //XML
page = PageXmlInputOutput.readPage(sourceFilename);
} catch (Exception e) {
System.err.println("Could not load source PAGE XML file: "+sourceFilename);
e.printStackTrace();
Expand Down

0 comments on commit e209c9d

Please sign in to comment.