diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 342aaec187a..db5f9d172cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; + import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; @@ -765,4 +767,8 @@ public boolean isAncestorOf( DvObject other ) { } return false; } + + public String getLocalURL() { + return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index bacd82daa0d..ed24ab0330b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -254,4 +254,27 @@ public static boolean verifyImportCharacters(String pidParam) { return m.matches(); } + + /** + * Convenience method to get the internal form of a PID string when it may be in + * the https:// or http:// form ToDo -refactor class to allow creating a + * GlobalID from any form (which assures it has valid syntax) and then have methods to get + * the form you want. + * + * @param pidUrlString - a string assumed to be a valid PID in some form + * @return the internal form as a String + */ + public static String getInternalFormOfPID(String pidUrlString) { + String pidString = pidUrlString; + if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HTTP_DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":")); + } else if(pidUrlString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) { + pidString = pidUrlString.replace(GlobalId.HTTP_HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":")); + } + return pidString; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index 56676e3d00a..f45ced9f409 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -72,6 +72,7 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; public class BagGenerator { @@ -204,7 +205,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception { // The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString()); - bagID = aggregation.get("@id").getAsString() + "v." + String pidUrlString = aggregation.get("@id").getAsString(); + String pidString=GlobalId.getInternalFormOfPID(pidUrlString); + bagID = pidString + "v." + aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString(); logger.info("Generating Bag: " + bagID); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index 19d9c2931ae..a295f264d66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; @@ -86,7 +87,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl()); Dataset dataset = version.getDataset(); - String id = dataset.getGlobalId().asString(); + String id = dataset.getGlobalId().toURL().toExternalForm(); JsonArrayBuilder fileArray = Json.createArrayBuilder(); // The map describes an aggregation JsonObjectBuilder aggBuilder = Json.createObjectBuilder(); @@ -214,7 +215,9 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(), - BrandingUtil.getRootDataverseCollectionName()); + BrandingUtil.getInstallationBrandName()); + + aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner())); String mdl = dataset.getMetadataLanguage(); if(!mdl.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) { aggBuilder.add(JsonLDTerm.schemaOrg("inLanguage").getLabel(), mdl); @@ -320,6 +323,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except } } + private JsonObjectBuilder getDataverseDescription(Dataverse dv) { + //Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here + JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL()); + addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription()); + Dataverse owner = dv.getOwner(); + if(owner!=null) { + dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner)); + } + return dvjob; + } + /* * Simple methods to only add an entry to JSON if the value of the term is * non-null. Methods created for string, JsonValue, boolean, and long diff --git a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java index e8dcb3ad2fe..ba233f3f364 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessor.java @@ -15,7 +15,7 @@ public class BagItFileHandlerPostProcessor { private static final Logger logger = Logger.getLogger(BagItFileHandlerPostProcessor.class.getCanonicalName()); - public static final List FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store", "._.DS_Store"); + public static final List FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store"); public List process(List items) { if(items == null) { @@ -26,7 +26,11 @@ public List process(List items) { for(DataFile item: items) { String fileName = item.getCurrentName(); - if(FILES_TO_IGNORE.contains(fileName)) { + if(fileName == null || fileName.isEmpty()) { + continue; + } + + if(FILES_TO_IGNORE.stream().anyMatch(prefix -> fileName.startsWith(prefix))) { logger.fine(String.format("action=BagItFileHandlerPostProcessor result=ignore-entry file=%s", fileName)); continue; } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java index 7a98b6573a2..f8c7565af7c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/file/BagItFileHandlerPostProcessorTest.java @@ -29,11 +29,43 @@ public void should_return_null_when_datafiles_are_null() throws Exception { @Test public void should_ignore_mac_control_files() throws Exception { String bagEntry = UUID.randomUUID().toString(); - String macFile01 = "__"; - String macFile02 = "._"; String macFile03 = ".DS_Store"; String macFile04 = "._.DS_Store"; - List dataFiles = createDataFiles(bagEntry, macFile01, macFile02, macFile03, macFile04); + List dataFiles = createDataFiles(bagEntry, macFile03, macFile04); + + List result = target.process(dataFiles); + MatcherAssert.assertThat(result.size(), Matchers.is(1)); + MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry)); + } + + @Test + public void should_ignore_empty_files() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String fileToIgnore = ""; + List dataFiles = createDataFiles(bagEntry, fileToIgnore); + + List result = target.process(dataFiles); + MatcherAssert.assertThat(result.size(), Matchers.is(1)); + MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry)); + } + + @Test + public void should_ignore_files_that_start_with_dot_underscore() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String fileToIgnore = "._FileNameToIgnore"; + List dataFiles = createDataFiles(bagEntry, fileToIgnore); + + List result = target.process(dataFiles); + MatcherAssert.assertThat(result.size(), Matchers.is(1)); + MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry)); + } + + @Test + public void should_ignore_files_that_start_with_double_underscore() throws Exception { + String bagEntry = UUID.randomUUID().toString(); + String fileToIgnore = "__FileNameToIgnore"; + String validFile = "validName"; + List dataFiles = createDataFiles(bagEntry, fileToIgnore); List result = target.process(dataFiles); MatcherAssert.assertThat(result.size(), Matchers.is(1));