Skip to content

Commit

Permalink
Merge branch 'develop' into 8740-file-recognition-based-on-filename I…
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Jul 18, 2022
2 parents 734f467 + 86f69bd commit 5cef645
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 8 deletions.
6 changes: 6 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/Dataverse.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccess;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch;
import edu.harvard.iq.dataverse.util.BundleUtil;
import edu.harvard.iq.dataverse.util.SystemConfig;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
Expand Down Expand Up @@ -765,4 +767,8 @@ public boolean isAncestorOf( DvObject other ) {
}
return false;
}

public String getLocalURL() {
return SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + this.getAlias();
}
}
23 changes: 23 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/GlobalId.java
Original file line number Diff line number Diff line change
Expand Up @@ -254,4 +254,27 @@ public static boolean verifyImportCharacters(String pidParam) {

return m.matches();
}

/**
* Convenience method to get the internal form of a PID string when it may be in
* the https:// or http:// form ToDo -refactor class to allow creating a
* GlobalID from any form (which assures it has valid syntax) and then have methods to get
* the form you want.
*
* @param pidUrlString - a string assumed to be a valid PID in some form
* @return the internal form as a String
*/
public static String getInternalFormOfPID(String pidUrlString) {
String pidString = pidUrlString;
if(pidUrlString.startsWith(GlobalId.DOI_RESOLVER_URL)) {
pidString = pidUrlString.replace(GlobalId.DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":"));
} else if(pidUrlString.startsWith(GlobalId.HDL_RESOLVER_URL)) {
pidString = pidUrlString.replace(GlobalId.HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":"));
} else if(pidUrlString.startsWith(GlobalId.HTTP_DOI_RESOLVER_URL)) {
pidString = pidUrlString.replace(GlobalId.HTTP_DOI_RESOLVER_URL, (GlobalId.DOI_PROTOCOL + ":"));
} else if(pidUrlString.startsWith(GlobalId.HTTP_HDL_RESOLVER_URL)) {
pidString = pidUrlString.replace(GlobalId.HTTP_HDL_RESOLVER_URL, (GlobalId.HDL_PROTOCOL + ":"));
}
return pidString;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFile.ChecksumType;
import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.util.json.JsonLDTerm;

public class BagGenerator {
Expand Down Expand Up @@ -204,7 +205,9 @@ public boolean generateBag(OutputStream outputStream) throws Exception {
// The oremapObject is javax.json.JsonObject and we need com.google.gson.JsonObject for the aggregation object
aggregation = (JsonObject) new JsonParser().parse(oremapObject.getJsonObject(JsonLDTerm.ore("describes").getLabel()).toString());

bagID = aggregation.get("@id").getAsString() + "v."
String pidUrlString = aggregation.get("@id").getAsString();
String pidString=GlobalId.getInternalFormOfPID(pidUrlString);
bagID = pidString + "v."
+ aggregation.get(JsonLDTerm.schemaOrg("version").getLabel()).getAsString();

logger.info("Generating Bag: " + bagID);
Expand Down
18 changes: 16 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObjectContainer;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.TermsOfUseAndAccess;
Expand Down Expand Up @@ -86,7 +87,7 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
localContext.putIfAbsent(JsonLDNamespace.schema.getPrefix(), JsonLDNamespace.schema.getUrl());

Dataset dataset = version.getDataset();
String id = dataset.getGlobalId().asString();
String id = dataset.getGlobalId().toURL().toExternalForm();
JsonArrayBuilder fileArray = Json.createArrayBuilder();
// The map describes an aggregation
JsonObjectBuilder aggBuilder = Json.createObjectBuilder();
Expand Down Expand Up @@ -214,7 +215,9 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
}

aggBuilder.add(JsonLDTerm.schemaOrg("includedInDataCatalog").getLabel(),
BrandingUtil.getRootDataverseCollectionName());
BrandingUtil.getInstallationBrandName());

aggBuilder.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(dataset.getOwner()));
String mdl = dataset.getMetadataLanguage();
if(!mdl.equals(DvObjectContainer.UNDEFINED_METADATA_LANGUAGE_CODE)) {
aggBuilder.add(JsonLDTerm.schemaOrg("inLanguage").getLabel(), mdl);
Expand Down Expand Up @@ -320,6 +323,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) throws Except
}
}

private JsonObjectBuilder getDataverseDescription(Dataverse dv) {
//Schema.org is already in local context, no updates needed as long as we only use chemaOrg and "@id" here
JsonObjectBuilder dvjob = Json.createObjectBuilder().add(JsonLDTerm.schemaOrg("name").getLabel(), dv.getCurrentName()).add("@id", dv.getLocalURL());
addIfNotNull(dvjob, JsonLDTerm.schemaOrg("description"), dv.getDescription());
Dataverse owner = dv.getOwner();
if(owner!=null) {
dvjob.add(JsonLDTerm.schemaOrg("isPartOf").getLabel(), getDataverseDescription(owner));
}
return dvjob;
}

/*
* Simple methods to only add an entry to JSON if the value of the term is
* non-null. Methods created for string, JsonValue, boolean, and long
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class BagItFileHandlerPostProcessor {

private static final Logger logger = Logger.getLogger(BagItFileHandlerPostProcessor.class.getCanonicalName());

public static final List<String> FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store", "._.DS_Store");
public static final List<String> FILES_TO_IGNORE = Arrays.asList("__", "._", ".DS_Store");

public List<DataFile> process(List<DataFile> items) {
if(items == null) {
Expand All @@ -26,7 +26,11 @@ public List<DataFile> process(List<DataFile> items) {

for(DataFile item: items) {
String fileName = item.getCurrentName();
if(FILES_TO_IGNORE.contains(fileName)) {
if(fileName == null || fileName.isEmpty()) {
continue;
}

if(FILES_TO_IGNORE.stream().anyMatch(prefix -> fileName.startsWith(prefix))) {
logger.fine(String.format("action=BagItFileHandlerPostProcessor result=ignore-entry file=%s", fileName));
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,43 @@ public void should_return_null_when_datafiles_are_null() throws Exception {
@Test
public void should_ignore_mac_control_files() throws Exception {
String bagEntry = UUID.randomUUID().toString();
String macFile01 = "__";
String macFile02 = "._";
String macFile03 = ".DS_Store";
String macFile04 = "._.DS_Store";
List<DataFile> dataFiles = createDataFiles(bagEntry, macFile01, macFile02, macFile03, macFile04);
List<DataFile> dataFiles = createDataFiles(bagEntry, macFile03, macFile04);

List<DataFile> result = target.process(dataFiles);
MatcherAssert.assertThat(result.size(), Matchers.is(1));
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
}

@Test
public void should_ignore_empty_files() throws Exception {
String bagEntry = UUID.randomUUID().toString();
String fileToIgnore = "";
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);

List<DataFile> result = target.process(dataFiles);
MatcherAssert.assertThat(result.size(), Matchers.is(1));
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
}

@Test
public void should_ignore_files_that_start_with_dot_underscore() throws Exception {
String bagEntry = UUID.randomUUID().toString();
String fileToIgnore = "._FileNameToIgnore";
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);

List<DataFile> result = target.process(dataFiles);
MatcherAssert.assertThat(result.size(), Matchers.is(1));
MatcherAssert.assertThat(result.get(0).getCurrentName(), Matchers.is(bagEntry));
}

@Test
public void should_ignore_files_that_start_with_double_underscore() throws Exception {
String bagEntry = UUID.randomUUID().toString();
String fileToIgnore = "__FileNameToIgnore";
String validFile = "validName";
List<DataFile> dataFiles = createDataFiles(bagEntry, fileToIgnore);

List<DataFile> result = target.process(dataFiles);
MatcherAssert.assertThat(result.size(), Matchers.is(1));
Expand Down

0 comments on commit 5cef645

Please sign in to comment.