From 8f526631f3f8aa3cf03223576b974088d4ed26a8 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 22 Nov 2017 11:06:41 -0500 Subject: [PATCH 1/8] implement export of schema.org JSON-LD #3700 --- .../source/admin/metadataexport.rst | 9 +- src/main/java/Bundle.properties | 1 + .../export/SchemaDotOrgExporter.java | 84 ++++++ .../export/SchemaDotOrgExporterTest.java | 279 ++++++++++++++++++ 4 files changed, 371 insertions(+), 2 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 8c50ceacd84..a759754b385 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -7,7 +7,12 @@ Metadata Export Automatic Exports ----------------- -Unlike in DVN v3, publishing a dataset in Dataverse 4 automaticalliy starts a metadata export job, that will run in the background, asynchronously. Once completed, it will make the dataset metadata exported and cached in all the supported formats (Dublin Core, Data Documentation Initiative (DDI), and native JSON). There is no need to run the export manually. +Publishing a dataset automatically starts a metadata export job, that will run in the background, asynchronously. Once completed, it will make the dataset metadata exported and cached in all the supported formats: + +- Dublin Core +- Data Documentation Initiative (DDI) +- schema.org JSON-LD +- native JSON (Dataverse-specific) A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the "Application Timers" section of this guide for more information) @@ -28,4 +33,4 @@ Note, that creating, modifying, or re-exporting an OAI set will also attempt to Export Failures --------------- -An export batch job, whether started via the API, or by the application timer, will leave a detailed log in your configured logs directory. This is the same location where your main Glassfish server.log is found. The name of the log file is ``export_[timestamp].log`` - for example, *export_2016-08-23T03-35-23.log*. The log will contain the numbers of datasets processed successfully and those for which metadata export failed, with some information on the failures detected. Please attach this log file if you need to contact Dataverse support about metadata export problems. \ No newline at end of file +An export batch job, whether started via the API, or by the application timer, will leave a detailed log in your configured logs directory. This is the same location where your main Glassfish server.log is found. The name of the log file is ``export_[timestamp].log`` - for example, *export_2016-08-23T03-35-23.log*. The log will contain the numbers of datasets processed successfully and those for which metadata export failed, with some information on the failures detected. Please attach this log file if you need to contact Dataverse support about metadata export problems. diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 4d7122f273c..039d857c8bf 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -1106,6 +1106,7 @@ dataset.editBtn.itemLabel.deaccession=Deaccession Dataset dataset.exportBtn=Export Metadata dataset.exportBtn.itemLabel.ddi=DDI dataset.exportBtn.itemLabel.dublinCore=Dublin Core +dataset.exportBtn.itemLabel.schemaDotOrg=schema.org JSON-LD dataset.exportBtn.itemLabel.json=JSON metrics.title=Metrics metrics.title.tip=View more metrics information diff --git a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java new file mode 100644 index 00000000000..46fca3616ca --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java @@ -0,0 +1,84 @@ +package edu.harvard.iq.dataverse.export; + +import com.google.auto.service.AutoService; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.export.spi.Exporter; +import edu.harvard.iq.dataverse.util.BundleUtil; +import java.io.IOException; +import java.io.OutputStream; +import java.io.StringReader; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonReader; + +@AutoService(Exporter.class) +public class SchemaDotOrgExporter implements Exporter { + + @Override + public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException { +// JsonObject json2 = Json.createObjectBuilder().add("foo", "bar").build(); + String jsonLdAsString = version.getJsonLd(); + StringReader foo = new StringReader(jsonLdAsString); + JsonReader bar = Json.createReader(foo); + JsonObject json2 = bar.readObject(); + try { + outputStream.write(json2.toString().getBytes("UTF8")); + } catch (IOException ex) { + Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex); + } + try { + outputStream.flush(); + } catch (IOException ex) { + Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex); + } + } + + @Override + public String getProviderName() { + return "schema.org"; + } + + @Override + public String getDisplayName() { + return BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.schemaDotOrg"); + } + + @Override + public Boolean isXMLFormat() { + return false; + } + + @Override + public Boolean isHarvestable() { + // Defer harvesting because the current effort was estimated as a "2": https://github.com/IQSS/dataverse/issues/3700 + return false; + } + + @Override + public Boolean isAvailableToUsers() { + return true; + } + + @Override + public String getXMLNameSpace() throws ExportException { + throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format."); + } + + @Override + public String getXMLSchemaLocation() throws ExportException { + throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format."); + } + + @Override + public String getXMLSchemaVersion() throws ExportException { + throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format."); + } + + @Override + public void setParam(String name, Object value) { + // this exporter doesn't need/doesn't currently take any parameters + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java new file mode 100644 index 00000000000..0a674960b2a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -0,0 +1,279 @@ +package edu.harvard.iq.dataverse.export; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; +import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; +import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.HashSet; +import java.util.Set; +import javax.json.Json; +import javax.json.JsonObject; +import javax.json.JsonReader; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; + +public class SchemaDotOrgExporterTest { + + private final SchemaDotOrgExporter schemaDotOrgExporter; + DDIExporterTest.MockDatasetFieldSvc datasetFieldTypeSvc = null; + + public SchemaDotOrgExporterTest() { + schemaDotOrgExporter = new SchemaDotOrgExporter(); + } + + @BeforeClass + public static void setUpClass() { + } + + @AfterClass + public static void tearDownClass() { + } + + @Before + public void setUp() { + datasetFieldTypeSvc = new DDIExporterTest.MockDatasetFieldSvc(); + + DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", DatasetFieldType.FieldType.TEXTBOX, false)); + DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", DatasetFieldType.FieldType.TEXT, true)); + Set authorChildTypes = new HashSet<>(); + authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", DatasetFieldType.FieldType.TEXT, false))); + authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", DatasetFieldType.FieldType.TEXT, false))); + authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", DatasetFieldType.FieldType.TEXT, false))); + DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", DatasetFieldType.FieldType.TEXT, false)); + authorIdentifierSchemeType.setAllowControlledVocabulary(true); + authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList( + // Why aren't these enforced? Should be ORCID, etc. + new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType), + new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType), + new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType) + )); + authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType)); + for (DatasetFieldType t : authorChildTypes) { + t.setParentDatasetFieldType(authorType); + } + authorType.setChildDatasetFieldTypes(authorChildTypes); + + DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", DatasetFieldType.FieldType.TEXT, true)); + Set datasetContactTypes = new HashSet<>(); + datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", DatasetFieldType.FieldType.TEXT, false))); + datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", DatasetFieldType.FieldType.TEXT, false))); + datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", DatasetFieldType.FieldType.TEXT, false))); + for (DatasetFieldType t : datasetContactTypes) { + t.setParentDatasetFieldType(datasetContactType); + } + datasetContactType.setChildDatasetFieldTypes(datasetContactTypes); + + DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", DatasetFieldType.FieldType.TEXT, true)); + Set dsDescriptionTypes = new HashSet<>(); + dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", DatasetFieldType.FieldType.TEXT, false))); + for (DatasetFieldType t : dsDescriptionTypes) { + t.setParentDatasetFieldType(dsDescriptionType); + } + dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes); + + DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true)); + DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false)); + + DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true)); + subjectType.setAllowControlledVocabulary(true); + subjectType.setControlledVocabularyValues(Arrays.asList( + new ControlledVocabularyValue(1l, "mgmt", subjectType), + new ControlledVocabularyValue(2l, "law", subjectType), + new ControlledVocabularyValue(3l, "cs", subjectType) + )); + + DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false)); + pubIdType.setAllowControlledVocabulary(true); + pubIdType.setControlledVocabularyValues(Arrays.asList( + new ControlledVocabularyValue(1l, "ark", pubIdType), + new ControlledVocabularyValue(2l, "doi", pubIdType), + new ControlledVocabularyValue(3l, "url", pubIdType) + )); + + DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true)); + Set childTypes = new HashSet<>(); + childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false))); + childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false))); + + for (DatasetFieldType t : childTypes) { + t.setParentDatasetFieldType(compoundSingleType); + } + compoundSingleType.setChildDatasetFieldTypes(childTypes); + } + + @After + public void tearDown() { + } + + /** + * Test of exportDataset method, of class SchemaDotOrgExporter. + */ + @Test + public void testExportDataset() throws Exception { + System.out.println("exportDataset"); + File datasetVersionJson = new File("src/test/resources/json/dataset-finch1.json"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + + JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson)); + JsonObject json1 = jsonReader1.readObject(); + JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, null); + DatasetVersion version = jsonParser.parseDatasetVersion(json1.getJsonObject("datasetVersion")); + version.setVersionState(DatasetVersion.VersionState.RELEASED); + SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd"); + Date publicationDate = dateFmt.parse("19551105"); + version.setReleaseTime(publicationDate); + version.setVersionNumber(1l); + // TODO: It might be nice to test TermsOfUseAndAccess some day + version.setTermsOfUseAndAccess(null); + Dataset dataset = new Dataset(); + dataset.setProtocol("doi"); + dataset.setAuthority("myAuthority"); + dataset.setIdentifier("myIdentifier"); + version.setDataset(dataset); + Dataverse dataverse = new Dataverse(); + dataverse.setName("LibraScholar"); + dataset.setOwner(dataverse); + System.setProperty(SITE_URL, "https://librascholar.org"); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + schemaDotOrgExporter.exportDataset(version, json1, byteArrayOutputStream); + String jsonLd = byteArrayOutputStream.toString(); + System.out.println("schema.org JSON-LD: " + JsonUtil.prettyPrint(jsonLd)); + JsonReader jsonReader2 = Json.createReader(new StringReader(jsonLd)); + JsonObject json2 = jsonReader2.readObject(); + assertEquals("http://schema.org", json2.getString("@context")); + assertEquals("Dataset", json2.getString("@type")); + assertEquals("http://dx.doi.org/myAuthority/myIdentifier", json2.getString("identifier")); + assertEquals("Darwin's Finches", json2.getString("name")); + assertEquals("Finch, Fiona", json2.getJsonArray("author").getJsonObject(0).getString("name")); + assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getString("affiliation")); + assertEquals("1955-11-05", json2.getString("dateModified")); + assertEquals("1", json2.getString("version")); + assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description")); + assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0)); + assertEquals("https://schema.org/version/3.3", json2.getString("schemaVersion")); + assertEquals("DataCatalog", json2.getJsonObject("includedInDataCatalog").getString("@type")); + assertEquals("LibraScholar", json2.getJsonObject("includedInDataCatalog").getString("name")); + assertEquals("https://librascholar.org", json2.getJsonObject("includedInDataCatalog").getString("url")); + assertEquals("Organization", json2.getJsonObject("provider").getString("@type")); + assertEquals("Dataverse", json2.getJsonObject("provider").getString("name")); + } + + /** + * Test of getProviderName method, of class SchemaDotOrgExporter. + */ + @Test + public void testGetProviderName() { + System.out.println("getProviderName"); + assertEquals("schema.org", schemaDotOrgExporter.getProviderName()); + } + + /** + * Test of getDisplayName method, of class SchemaDotOrgExporter. + */ + @Test + public void testGetDisplayName() { + System.out.println("getDisplayName"); + assertEquals("schema.org JSON-LD", schemaDotOrgExporter.getDisplayName()); + } + + /** + * Test of isXMLFormat method, of class SchemaDotOrgExporter. + */ + @Test + public void testIsXMLFormat() { + System.out.println("isXMLFormat"); + assertEquals(false, schemaDotOrgExporter.isXMLFormat()); + } + + /** + * Test of isHarvestable method, of class SchemaDotOrgExporter. + */ + @Test + public void testIsHarvestable() { + System.out.println("isHarvestable"); + assertEquals(false, schemaDotOrgExporter.isHarvestable()); + } + + /** + * Test of isAvailableToUsers method, of class SchemaDotOrgExporter. + */ + @Test + public void testIsAvailableToUsers() { + System.out.println("isAvailableToUsers"); + assertEquals(true, schemaDotOrgExporter.isAvailableToUsers()); + } + + /** + * Test of getXMLNameSpace method, of class SchemaDotOrgExporter. + */ + @Test + public void testGetXMLNameSpace() throws Exception { + System.out.println("getXMLNameSpace"); + ExportException expectedException = null; + try { + String result = schemaDotOrgExporter.getXMLNameSpace(); + } catch (ExportException ex) { + expectedException = ex; + } + assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage()); + } + + /** + * Test of getXMLSchemaLocation method, of class SchemaDotOrgExporter. + */ + @Test + public void testGetXMLSchemaLocation() throws Exception { + System.out.println("getXMLSchemaLocation"); + ExportException expectedException = null; + try { + String result = schemaDotOrgExporter.getXMLSchemaLocation(); + } catch (ExportException ex) { + expectedException = ex; + } + assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage()); + } + + /** + * Test of getXMLSchemaVersion method, of class SchemaDotOrgExporter. + */ + @Test + public void testGetXMLSchemaVersion() throws Exception { + System.out.println("getXMLSchemaVersion"); + ExportException expectedException = null; + try { + String result = schemaDotOrgExporter.getXMLSchemaVersion(); + } catch (ExportException ex) { + expectedException = ex; + } + assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage()); + } + + /** + * Test of setParam method, of class SchemaDotOrgExporter. + */ + @Test + public void testSetParam() { + System.out.println("setParam"); + String name = ""; + Object value = null; + schemaDotOrgExporter.setParam(name, value); + } + +} From b00d4d6f4c091aa68a816f06d3129322fc751122 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 22 Nov 2017 12:28:25 -0500 Subject: [PATCH 2/8] capitalize "Schema.org" #3700 --- src/main/java/Bundle.properties | 2 +- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 039d857c8bf..fae4237f8ad 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -1106,7 +1106,7 @@ dataset.editBtn.itemLabel.deaccession=Deaccession Dataset dataset.exportBtn=Export Metadata dataset.exportBtn.itemLabel.ddi=DDI dataset.exportBtn.itemLabel.dublinCore=Dublin Core -dataset.exportBtn.itemLabel.schemaDotOrg=schema.org JSON-LD +dataset.exportBtn.itemLabel.schemaDotOrg=Schema.org JSON-LD dataset.exportBtn.itemLabel.json=JSON metrics.title=Metrics metrics.title.tip=View more metrics information diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index 0a674960b2a..ca90668c3c3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -190,7 +190,7 @@ public void testGetProviderName() { @Test public void testGetDisplayName() { System.out.println("getDisplayName"); - assertEquals("schema.org JSON-LD", schemaDotOrgExporter.getDisplayName()); + assertEquals("Schema.org JSON-LD", schemaDotOrgExporter.getDisplayName()); } /** From 2f278ccf473cff13de2e4627b15726ec2ba8e4ce Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 22 Nov 2017 12:33:56 -0500 Subject: [PATCH 3/8] cleanup #3700 --- .../dataverse/export/SchemaDotOrgExporter.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java index 46fca3616ca..c9f5395750c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.io.OutputStream; import java.io.StringReader; -import java.util.logging.Level; import java.util.logging.Logger; import javax.json.Json; import javax.json.JsonObject; @@ -16,22 +15,23 @@ @AutoService(Exporter.class) public class SchemaDotOrgExporter implements Exporter { + private static final Logger logger = Logger.getLogger(SchemaDotOrgExporter.class.getCanonicalName()); + @Override public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException { -// JsonObject json2 = Json.createObjectBuilder().add("foo", "bar").build(); String jsonLdAsString = version.getJsonLd(); - StringReader foo = new StringReader(jsonLdAsString); - JsonReader bar = Json.createReader(foo); - JsonObject json2 = bar.readObject(); + StringReader stringReader = new StringReader(jsonLdAsString); + JsonReader jsonReader = Json.createReader(stringReader); + JsonObject jsonLdJsonObject = jsonReader.readObject(); try { - outputStream.write(json2.toString().getBytes("UTF8")); + outputStream.write(jsonLdJsonObject.toString().getBytes("UTF8")); } catch (IOException ex) { - Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex); + logger.info("IOException calling outputStream.write: " + ex); } try { outputStream.flush(); } catch (IOException ex) { - Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex); + logger.info("IOException calling outputStream.flush: " + ex); } } From 086824dc2651ccea389a83b8c5f91aa469c439c5 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 29 Nov 2017 10:57:32 -0500 Subject: [PATCH 4/8] note that we know "affliation" throws a warning #3700 --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 4865079430c..6b1936984ad 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1244,6 +1244,9 @@ public String getJsonLd() { // We are aware of "givenName" and "familyName" but instead of a person it might be an organization such as "Gallup Organization". //author.add("@type", "Person"); author.add("name", name); + // We are aware that the following error is thrown by https://search.google.com/structured-data/testing-tool + // "The property affiliation is not recognized by Google for an object of type Thing." + // Someone at Google has said this is ok. if (!StringUtil.isEmpty(affiliation)) { author.add("affiliation", affiliation); } From e5c2528a43575f14a685d69c4ce27f55bd771448 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 29 Nov 2017 12:11:17 -0500 Subject: [PATCH 5/8] capitalize Schema.org in guides #3700 --- doc/sphinx-guides/source/admin/metadataexport.rst | 2 +- .../harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index a759754b385..c6ebef0ce15 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -11,7 +11,7 @@ Publishing a dataset automatically starts a metadata export job, that will run i - Dublin Core - Data Documentation Initiative (DDI) -- schema.org JSON-LD +- Schema.org JSON-LD - native JSON (Dataverse-specific) A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the "Application Timers" section of this guide for more information) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index ca90668c3c3..000aa642248 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -190,6 +190,7 @@ public void testGetProviderName() { @Test public void testGetDisplayName() { System.out.println("getDisplayName"); + // We capitalize "Schema.org" because it looks better in the dropdown list and it's what DataCite does in their UI. assertEquals("Schema.org JSON-LD", schemaDotOrgExporter.getDisplayName()); } From ba9c6bd138140862258c62cbbcbca7a701180b91 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 29 Nov 2017 12:28:16 -0500 Subject: [PATCH 6/8] API: document "schema.org" as a supported export format #3700 --- doc/sphinx-guides/source/api/native-api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 756966a610c..7b3659d31e3 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -152,7 +152,7 @@ Delete the dataset whose id is passed:: GET http://$SERVER/api/datasets/export?exporter=ddi&persistentId=$persistentId -.. note:: Supported exporters (export formats) are ``ddi``, ``oai_ddi``, ``dcterms``, ``oai_dc``, and ``dataverse_json``. +.. note:: Supported exporters (export formats) are ``ddi``, ``oai_ddi``, ``dcterms``, ``oai_dc``, ``schema.org`` , and ``dataverse_json``. |CORS| Lists all the file metadata, for the given dataset and version:: From 84224bd93a68bfa924d721b3b91996efd8701ce8 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 29 Nov 2017 12:45:53 -0500 Subject: [PATCH 7/8] guard against null terms.getTermsOfUse() #3700 --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6b1936984ad..da9b765b465 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1344,7 +1344,11 @@ public String getJsonLd() { if (TermsOfUseAndAccess.License.CC0.equals(terms.getLicense())) { license.add("text", "CC0").add("url", "https://creativecommons.org/publicdomain/zero/1.0/"); } else { - license.add("text", terms.getTermsOfUse()); + String termsOfUse = terms.getTermsOfUse(); + // Terms of use can be null if you create the dataset with JSON. + if (termsOfUse != null) { + license.add("text", termsOfUse); + } } job.add("license",license); From 3cc02d032a2a4607b2bcb4c59f4bab5ab37125b6 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 29 Nov 2017 12:53:04 -0500 Subject: [PATCH 8/8] have dataset page get cached JSON-LD, if available #3700 --- .../java/edu/harvard/iq/dataverse/DatasetPage.java | 11 ++++++++++- .../java/edu/harvard/iq/dataverse/DatasetVersion.java | 8 ++------ .../iq/dataverse/export/SchemaDotOrgExporter.java | 4 +++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index aea585e084f..d83ca7a645c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -86,6 +86,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand; import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; +import edu.harvard.iq.dataverse.export.SchemaDotOrgExporter; import java.util.Collections; import javax.faces.event.AjaxBehaviorEvent; @@ -4068,7 +4069,15 @@ public boolean isThisLatestReleasedVersion() { public String getJsonLd() { if (isThisLatestReleasedVersion()) { - return workingVersion.getJsonLd(); + ExportService instance = ExportService.getInstance(settingsService); + String jsonLd = instance.getExportAsString(dataset, SchemaDotOrgExporter.NAME); + if (jsonLd != null) { + logger.fine("Returning cached schema.org JSON-LD."); + return jsonLd; + } else { + logger.fine("No cached schema.org JSON-LD available. Going to the database."); + return workingVersion.getJsonLd(); + } } return ""; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index da9b765b465..a01422ac2da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1211,12 +1211,8 @@ public String getPublicationDateAsString() { return r; } - // TODO: Make this more performant by writing the output to the database or a file? - // Agree - now that this has grown into a somewhat complex chunk of formatted - // metadata - and not just a couple of values inserted into the page html - - // it feels like it would make more sense to treat it as another supported - // export format, that can be produced once and cached. - // The problem with that is that the export subsystem assumes there is only + // TODO: Consider moving this comment into the Exporter code. + // The export subsystem assumes there is only // one metadata export in a given format per dataset (it uses the current // released (published) version. This JSON fragment is generated for a // specific released version - and we can have multiple released versions. diff --git a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java index c9f5395750c..e039407fcf2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporter.java @@ -17,6 +17,8 @@ public class SchemaDotOrgExporter implements Exporter { private static final Logger logger = Logger.getLogger(SchemaDotOrgExporter.class.getCanonicalName()); + public static final String NAME = "schema.org"; + @Override public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException { String jsonLdAsString = version.getJsonLd(); @@ -37,7 +39,7 @@ public void exportDataset(DatasetVersion version, JsonObject json, OutputStream @Override public String getProviderName() { - return "schema.org"; + return NAME; } @Override