Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RO-Crate exporter PoC #10086

Closed
16 changes: 11 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<skipUnitTests>false</skipUnitTests>
<skipIntegrationTests>false</skipIntegrationTests>
<it.groups>integration</it.groups>

<!-- By default, this module will produce a WAR file. -->
<!-- This will be switched within the container profile! -->
<packaging.type>war</packaging.type>
Expand Down Expand Up @@ -225,7 +225,7 @@
<artifactId>jakarta.json</artifactId>
<scope>provided</scope>
</dependency>

<!-- JSON-B -->
<dependency>
<groupId>jakarta.json.bind</groupId>
Expand All @@ -237,7 +237,7 @@
<artifactId>yasson</artifactId>
<scope>test</scope>
</dependency>

<!-- Jakarta Faces & related -->
<dependency>
<groupId>org.glassfish</groupId>
Expand Down Expand Up @@ -450,7 +450,7 @@
<artifactId>caffeine</artifactId>
<version>3.1.8</version>
</dependency>

<!-- New and Improved GDCC XOAI library! -->
<dependency>
<groupId>io.gdcc</groupId>
Expand Down Expand Up @@ -660,7 +660,7 @@
<version>4.0.4</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
Expand Down Expand Up @@ -690,6 +690,12 @@
<artifactId>hazelcast</artifactId>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/edu.kit.datamanager/ro-crate-java -->
<dependency>
<groupId>edu.kit.datamanager</groupId>
<artifactId>ro-crate-java</artifactId>
<version>1.1.0-rc.1</version>
</dependency>
</dependencies>
<build>
<testResources>
Expand Down
98 changes: 98 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/export/RoCrateExporter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package edu.harvard.iq.dataverse.export;

import com.google.auto.service.AutoService;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.rocrate.RoCrateManager;
import edu.harvard.iq.dataverse.util.BundleUtil;
import io.gdcc.spi.export.ExportDataProvider;
import io.gdcc.spi.export.ExportException;
import io.gdcc.spi.export.Exporter;
import jakarta.ws.rs.core.MediaType;

import java.io.FileInputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Locale;
import java.util.Optional;
import jakarta.enterprise.inject.spi.CDI;

/**
* Exports the dataset metadata as an RO-Crate JSON using the metadatablocks of the dataset as the schema
* of the RO-Crate. It means, that all data that is present in the dataset in Dataverse is exported as is without
* mapping it to the Schema.org vocabulary, which is the default schema for RO-Crates.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@AutoService(Exporter.class)
public class RoCrateExporter implements Exporter {

@Override
public void exportDataset(ExportDataProvider exportDataProvider, OutputStream outputStream) throws ExportException {
// The exporter is not loaded as an EJB by the ExporterService but is initialized directly, which means
// we cannot inject EJB-s with the usual @EJB annotation. Instead, we look them up at runtime
var roCrateManager = CDI.current().select(RoCrateManager.class).get();
var datasetService = CDI.current().select(DatasetServiceBean.class).get();

// exportDataProvider doesn't provide the Dataset object only the JSON representation, so we get this to
// read the ID of the Dataset and then load it via the datasetService
var json = exportDataProvider.getDatasetJson();
var ds = datasetService.find(Long.valueOf(json.get("id").toString()));

// Get the version specified in the json. Note: it is actually always the latest version, one cannot
// export metadata of older versions.
var versionId = Long.valueOf(json.getJsonObject("datasetVersion").get("id").toString());
var version = ds.getVersionFromId(versionId);

// Now we can create the RO-Crate using the roCrateManager
String roCratePath = roCrateManager.getRoCratePath(version);
if (!Files.exists(Paths.get(roCratePath))) {
try {
roCrateManager.createOrUpdateRoCrate(version);
} catch (Exception e) {
throw new ExportException(e.getMessage());
}
}

try (FileInputStream fis = new FileInputStream(roCratePath)) {
byte[] buffer = new byte[1024];
int bytesRead;
while ((bytesRead = fis.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
} catch (Exception e) {
throw new ExportException(e.getMessage());
}
}

@Override
public String getFormatName() {
return "rocrate";
}

@Override
public String getDisplayName(Locale locale) {
String displayName = BundleUtil.getStringFromBundle("dataset.exportBtn.itemLabel.rocrate",locale);
return Optional.ofNullable(displayName).orElse("RO-Crate");
}

@Override
public Boolean isHarvestable() {
return true;
}

@Override
public Boolean isAvailableToUsers() {
return true;
}

@Override
public String getMediaType() {
return MediaType.APPLICATION_JSON;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity;
import jakarta.ejb.EJB;
import jakarta.ejb.Stateless;
import jakarta.inject.Named;

import java.util.List;
import java.util.stream.Collectors;

/**
* Manages RO-Crate conformsTo values based on Dataverse metadatablocks.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@Stateless
@Named
public class DefaultRoCrateConformsToIdProvider implements RoCrateConformsToIdProvider {
@EJB
protected MetadataBlockServiceBean metadataBlockSvc;

private static String ID_PREFIX = "https://dataverse.org/metadatablocks/";

@Override
public List<String> generateConformsToIds(Dataset dataset, RootDataEntity rootDataEntity) {
return dataset.getOwner().getMetadataBlocks().stream()
.map(metadataBlock -> ID_PREFIX+metadataBlock.getName())
.collect(Collectors.toList());
}

@Override
public List<MetadataBlock> findMetadataBlockForConformsToIds(List<String> ids) {
return ids.stream()
.map(id -> metadataBlockSvc.findByName(id.substring(ID_PREFIX.length())))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
import jakarta.ejb.Stateless;
import jakarta.inject.Named;

import java.util.stream.Collectors;

/**
* Helps to generate a value for the "name" field of RO-Crate objects for a Dataverse compound field. This
* RoCrateNameProvider implementation genetates a string value similar to the Metadata tab of the Dataverse dataset
* UI.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/
@Stateless
@Named
public class DefaultRoCrateNameProvider implements RoCrateNameProvider {
@Override
public String generateRoCrateName(DatasetFieldCompoundValue compoundValue) {
var nameFieldValue = compoundValue.getDisplayValueMap().entrySet().stream()
.map(o -> o.getValue())
.collect(Collectors.joining(" "));
nameFieldValue = (nameFieldValue.length() > 80) ? nameFieldValue.substring(0, 77) + "..." : nameFieldValue;

return nameFieldValue;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package edu.harvard.iq.dataverse.rocrate;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity;

import java.util.List;

/**
* Manages RO-Crate conformsTo values.
*
* This class has been extracted from the ARP project (https://science-research-data.hu/en) in the frame of
* FAIR-IMPACT's 1st Open Call "Enabling FAIR Signposting and RO-Crate for content/metadata discovery and consumption".
*
* @author Balázs E. Pataki <balazs.pataki@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @author Norbert Finta <norbert.finta@sztaki.hu>, SZTAKI, Department of Distributed Systems, https://dsd.sztaki.hu
* @version 1.0
*/

public interface RoCrateConformsToIdProvider {
List<String> generateConformsToIds(Dataset dataset, RootDataEntity rootDataEntity);

List<MetadataBlock> findMetadataBlockForConformsToIds(List<String> ids);
}
Loading
Loading