Skip to content

Commit

Permalink
implement export of schema.org JSON-LD IQSS#3700
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Nov 22, 2017
1 parent caf6371 commit 8f52663
Show file tree
Hide file tree
Showing 4 changed files with 371 additions and 2 deletions.
9 changes: 7 additions & 2 deletions doc/sphinx-guides/source/admin/metadataexport.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@ Metadata Export
Automatic Exports
-----------------

Unlike in DVN v3, publishing a dataset in Dataverse 4 automaticalliy starts a metadata export job, that will run in the background, asynchronously. Once completed, it will make the dataset metadata exported and cached in all the supported formats (Dublin Core, Data Documentation Initiative (DDI), and native JSON). There is no need to run the export manually.
Publishing a dataset automatically starts a metadata export job, that will run in the background, asynchronously. Once completed, it will make the dataset metadata exported and cached in all the supported formats:

- Dublin Core
- Data Documentation Initiative (DDI)
- schema.org JSON-LD
- native JSON (Dataverse-specific)

A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the "Application Timers" section of this guide for more information)

Expand All @@ -28,4 +33,4 @@ Note, that creating, modifying, or re-exporting an OAI set will also attempt to
Export Failures
---------------

An export batch job, whether started via the API, or by the application timer, will leave a detailed log in your configured logs directory. This is the same location where your main Glassfish server.log is found. The name of the log file is ``export_[timestamp].log`` - for example, *export_2016-08-23T03-35-23.log*. The log will contain the numbers of datasets processed successfully and those for which metadata export failed, with some information on the failures detected. Please attach this log file if you need to contact Dataverse support about metadata export problems.
An export batch job, whether started via the API, or by the application timer, will leave a detailed log in your configured logs directory. This is the same location where your main Glassfish server.log is found. The name of the log file is ``export_[timestamp].log`` - for example, *export_2016-08-23T03-35-23.log*. The log will contain the numbers of datasets processed successfully and those for which metadata export failed, with some information on the failures detected. Please attach this log file if you need to contact Dataverse support about metadata export problems.
1 change: 1 addition & 0 deletions src/main/java/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,7 @@ dataset.editBtn.itemLabel.deaccession=Deaccession Dataset
dataset.exportBtn=Export Metadata
dataset.exportBtn.itemLabel.ddi=DDI
dataset.exportBtn.itemLabel.dublinCore=Dublin Core
dataset.exportBtn.itemLabel.schemaDotOrg=schema.org JSON-LD
dataset.exportBtn.itemLabel.json=JSON
metrics.title=Metrics
metrics.title.tip=View more metrics information
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package edu.harvard.iq.dataverse.export;

import com.google.auto.service.AutoService;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.export.spi.Exporter;
import edu.harvard.iq.dataverse.util.BundleUtil;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonObject;
import javax.json.JsonReader;

@AutoService(Exporter.class)
public class SchemaDotOrgExporter implements Exporter {

@Override
public void exportDataset(DatasetVersion version, JsonObject json, OutputStream outputStream) throws ExportException {
// JsonObject json2 = Json.createObjectBuilder().add("foo", "bar").build();
String jsonLdAsString = version.getJsonLd();
StringReader foo = new StringReader(jsonLdAsString);
JsonReader bar = Json.createReader(foo);
JsonObject json2 = bar.readObject();
try {
outputStream.write(json2.toString().getBytes("UTF8"));
} catch (IOException ex) {
Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex);
}
try {
outputStream.flush();
} catch (IOException ex) {
Logger.getLogger(SchemaDotOrgExporter.class.getName()).log(Level.SEVERE, null, ex);
}
}

@Override
public String getProviderName() {
return "schema.org";
}

@Override
public String getDisplayName() {
return BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.schemaDotOrg");
}

@Override
public Boolean isXMLFormat() {
return false;
}

@Override
public Boolean isHarvestable() {
// Defer harvesting because the current effort was estimated as a "2": https://github.com/IQSS/dataverse/issues/3700
return false;
}

@Override
public Boolean isAvailableToUsers() {
return true;
}

@Override
public String getXMLNameSpace() throws ExportException {
throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.");
}

@Override
public String getXMLSchemaLocation() throws ExportException {
throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.");
}

@Override
public String getXMLSchemaVersion() throws ExportException {
throw new ExportException(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.");
}

@Override
public void setParam(String name, Object value) {
// this exporter doesn't need/doesn't currently take any parameters
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
package edu.harvard.iq.dataverse.export;

import edu.harvard.iq.dataverse.ControlledVocabularyValue;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL;
import edu.harvard.iq.dataverse.util.json.JsonParser;
import edu.harvard.iq.dataverse.util.json.JsonUtil;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.StringReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import javax.json.Json;
import javax.json.JsonObject;
import javax.json.JsonReader;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;

public class SchemaDotOrgExporterTest {

private final SchemaDotOrgExporter schemaDotOrgExporter;
DDIExporterTest.MockDatasetFieldSvc datasetFieldTypeSvc = null;

public SchemaDotOrgExporterTest() {
schemaDotOrgExporter = new SchemaDotOrgExporter();
}

@BeforeClass
public static void setUpClass() {
}

@AfterClass
public static void tearDownClass() {
}

@Before
public void setUp() {
datasetFieldTypeSvc = new DDIExporterTest.MockDatasetFieldSvc();

DatasetFieldType titleType = datasetFieldTypeSvc.add(new DatasetFieldType("title", DatasetFieldType.FieldType.TEXTBOX, false));
DatasetFieldType authorType = datasetFieldTypeSvc.add(new DatasetFieldType("author", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> authorChildTypes = new HashSet<>();
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorName", DatasetFieldType.FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorAffiliation", DatasetFieldType.FieldType.TEXT, false)));
authorChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifier", DatasetFieldType.FieldType.TEXT, false)));
DatasetFieldType authorIdentifierSchemeType = datasetFieldTypeSvc.add(new DatasetFieldType("authorIdentifierScheme", DatasetFieldType.FieldType.TEXT, false));
authorIdentifierSchemeType.setAllowControlledVocabulary(true);
authorIdentifierSchemeType.setControlledVocabularyValues(Arrays.asList(
// Why aren't these enforced? Should be ORCID, etc.
new ControlledVocabularyValue(1l, "ark", authorIdentifierSchemeType),
new ControlledVocabularyValue(2l, "doi", authorIdentifierSchemeType),
new ControlledVocabularyValue(3l, "url", authorIdentifierSchemeType)
));
authorChildTypes.add(datasetFieldTypeSvc.add(authorIdentifierSchemeType));
for (DatasetFieldType t : authorChildTypes) {
t.setParentDatasetFieldType(authorType);
}
authorType.setChildDatasetFieldTypes(authorChildTypes);

DatasetFieldType datasetContactType = datasetFieldTypeSvc.add(new DatasetFieldType("datasetContact", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> datasetContactTypes = new HashSet<>();
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactEmail", DatasetFieldType.FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactName", DatasetFieldType.FieldType.TEXT, false)));
datasetContactTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("datasetContactAffiliation", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : datasetContactTypes) {
t.setParentDatasetFieldType(datasetContactType);
}
datasetContactType.setChildDatasetFieldTypes(datasetContactTypes);

DatasetFieldType dsDescriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("dsDescription", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> dsDescriptionTypes = new HashSet<>();
dsDescriptionTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("dsDescriptionValue", DatasetFieldType.FieldType.TEXT, false)));
for (DatasetFieldType t : dsDescriptionTypes) {
t.setParentDatasetFieldType(dsDescriptionType);
}
dsDescriptionType.setChildDatasetFieldTypes(dsDescriptionTypes);

DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true));
DatasetFieldType descriptionType = datasetFieldTypeSvc.add(new DatasetFieldType("description", DatasetFieldType.FieldType.TEXTBOX, false));

DatasetFieldType subjectType = datasetFieldTypeSvc.add(new DatasetFieldType("subject", DatasetFieldType.FieldType.TEXT, true));
subjectType.setAllowControlledVocabulary(true);
subjectType.setControlledVocabularyValues(Arrays.asList(
new ControlledVocabularyValue(1l, "mgmt", subjectType),
new ControlledVocabularyValue(2l, "law", subjectType),
new ControlledVocabularyValue(3l, "cs", subjectType)
));

DatasetFieldType pubIdType = datasetFieldTypeSvc.add(new DatasetFieldType("publicationIdType", DatasetFieldType.FieldType.TEXT, false));
pubIdType.setAllowControlledVocabulary(true);
pubIdType.setControlledVocabularyValues(Arrays.asList(
new ControlledVocabularyValue(1l, "ark", pubIdType),
new ControlledVocabularyValue(2l, "doi", pubIdType),
new ControlledVocabularyValue(3l, "url", pubIdType)
));

DatasetFieldType compoundSingleType = datasetFieldTypeSvc.add(new DatasetFieldType("coordinate", DatasetFieldType.FieldType.TEXT, true));
Set<DatasetFieldType> childTypes = new HashSet<>();
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lat", DatasetFieldType.FieldType.TEXT, false)));
childTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("lon", DatasetFieldType.FieldType.TEXT, false)));

for (DatasetFieldType t : childTypes) {
t.setParentDatasetFieldType(compoundSingleType);
}
compoundSingleType.setChildDatasetFieldTypes(childTypes);
}

@After
public void tearDown() {
}

/**
* Test of exportDataset method, of class SchemaDotOrgExporter.
*/
@Test
public void testExportDataset() throws Exception {
System.out.println("exportDataset");
File datasetVersionJson = new File("src/test/resources/json/dataset-finch1.json");
String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath())));

JsonReader jsonReader1 = Json.createReader(new StringReader(datasetVersionAsJson));
JsonObject json1 = jsonReader1.readObject();
JsonParser jsonParser = new JsonParser(datasetFieldTypeSvc, null, null);
DatasetVersion version = jsonParser.parseDatasetVersion(json1.getJsonObject("datasetVersion"));
version.setVersionState(DatasetVersion.VersionState.RELEASED);
SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd");
Date publicationDate = dateFmt.parse("19551105");
version.setReleaseTime(publicationDate);
version.setVersionNumber(1l);
// TODO: It might be nice to test TermsOfUseAndAccess some day
version.setTermsOfUseAndAccess(null);
Dataset dataset = new Dataset();
dataset.setProtocol("doi");
dataset.setAuthority("myAuthority");
dataset.setIdentifier("myIdentifier");
version.setDataset(dataset);
Dataverse dataverse = new Dataverse();
dataverse.setName("LibraScholar");
dataset.setOwner(dataverse);
System.setProperty(SITE_URL, "https://librascholar.org");

ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
schemaDotOrgExporter.exportDataset(version, json1, byteArrayOutputStream);
String jsonLd = byteArrayOutputStream.toString();
System.out.println("schema.org JSON-LD: " + JsonUtil.prettyPrint(jsonLd));
JsonReader jsonReader2 = Json.createReader(new StringReader(jsonLd));
JsonObject json2 = jsonReader2.readObject();
assertEquals("http://schema.org", json2.getString("@context"));
assertEquals("Dataset", json2.getString("@type"));
assertEquals("http://dx.doi.org/myAuthority/myIdentifier", json2.getString("identifier"));
assertEquals("Darwin's Finches", json2.getString("name"));
assertEquals("Finch, Fiona", json2.getJsonArray("author").getJsonObject(0).getString("name"));
assertEquals("Birds Inc.", json2.getJsonArray("author").getJsonObject(0).getString("affiliation"));
assertEquals("1955-11-05", json2.getString("dateModified"));
assertEquals("1", json2.getString("version"));
assertEquals("Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", json2.getString("description"));
assertEquals("Medicine, Health and Life Sciences", json2.getJsonArray("keywords").getString(0));
assertEquals("https://schema.org/version/3.3", json2.getString("schemaVersion"));
assertEquals("DataCatalog", json2.getJsonObject("includedInDataCatalog").getString("@type"));
assertEquals("LibraScholar", json2.getJsonObject("includedInDataCatalog").getString("name"));
assertEquals("https://librascholar.org", json2.getJsonObject("includedInDataCatalog").getString("url"));
assertEquals("Organization", json2.getJsonObject("provider").getString("@type"));
assertEquals("Dataverse", json2.getJsonObject("provider").getString("name"));
}

/**
* Test of getProviderName method, of class SchemaDotOrgExporter.
*/
@Test
public void testGetProviderName() {
System.out.println("getProviderName");
assertEquals("schema.org", schemaDotOrgExporter.getProviderName());
}

/**
* Test of getDisplayName method, of class SchemaDotOrgExporter.
*/
@Test
public void testGetDisplayName() {
System.out.println("getDisplayName");
assertEquals("schema.org JSON-LD", schemaDotOrgExporter.getDisplayName());
}

/**
* Test of isXMLFormat method, of class SchemaDotOrgExporter.
*/
@Test
public void testIsXMLFormat() {
System.out.println("isXMLFormat");
assertEquals(false, schemaDotOrgExporter.isXMLFormat());
}

/**
* Test of isHarvestable method, of class SchemaDotOrgExporter.
*/
@Test
public void testIsHarvestable() {
System.out.println("isHarvestable");
assertEquals(false, schemaDotOrgExporter.isHarvestable());
}

/**
* Test of isAvailableToUsers method, of class SchemaDotOrgExporter.
*/
@Test
public void testIsAvailableToUsers() {
System.out.println("isAvailableToUsers");
assertEquals(true, schemaDotOrgExporter.isAvailableToUsers());
}

/**
* Test of getXMLNameSpace method, of class SchemaDotOrgExporter.
*/
@Test
public void testGetXMLNameSpace() throws Exception {
System.out.println("getXMLNameSpace");
ExportException expectedException = null;
try {
String result = schemaDotOrgExporter.getXMLNameSpace();
} catch (ExportException ex) {
expectedException = ex;
}
assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage());
}

/**
* Test of getXMLSchemaLocation method, of class SchemaDotOrgExporter.
*/
@Test
public void testGetXMLSchemaLocation() throws Exception {
System.out.println("getXMLSchemaLocation");
ExportException expectedException = null;
try {
String result = schemaDotOrgExporter.getXMLSchemaLocation();
} catch (ExportException ex) {
expectedException = ex;
}
assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage());
}

/**
* Test of getXMLSchemaVersion method, of class SchemaDotOrgExporter.
*/
@Test
public void testGetXMLSchemaVersion() throws Exception {
System.out.println("getXMLSchemaVersion");
ExportException expectedException = null;
try {
String result = schemaDotOrgExporter.getXMLSchemaVersion();
} catch (ExportException ex) {
expectedException = ex;
}
assertEquals(SchemaDotOrgExporter.class.getSimpleName() + ": not an XML format.", expectedException.getMessage());
}

/**
* Test of setParam method, of class SchemaDotOrgExporter.
*/
@Test
public void testSetParam() {
System.out.println("setParam");
String name = "";
Object value = null;
schemaDotOrgExporter.setParam(name, value);
}

}

0 comments on commit 8f52663

Please sign in to comment.