From 299b5b7377a7cd9fc882d8e016f2001bcd9ec5ab Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 26 Feb 2020 12:20:27 -0500 Subject: [PATCH 1/4] Added some warnings and TODOs to the code that handles package files, about the need to address how they handle storage locations. (#6666) --- src/main/java/edu/harvard/iq/dataverse/Dataset.java | 3 ++- .../importer/filesystem/FileRecordJobListener.java | 10 ++++++++++ .../jobs/importer/filesystem/FileRecordReader.java | 10 ++++++++++ .../command/impl/ImportFromFileSystemCommand.java | 12 ++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 9e71ff56085..92ebc82df9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -504,7 +504,8 @@ private Collection getCategoryNames() { } } - /*Only used with packageFiles after the implementation of multi-store in #6488 + /* Only used with packageFiles after the implementation of multi-store in #6488 + * DO NOT USE THIS METHOD FOR ANY OTHER PURPOSES - it's @Deprecated for a reason. * */ @Deprecated diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index 4a6f48f1600..d6713a42e12 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -439,6 +439,16 @@ private void loadChecksumManifest() { + SEP + dataset.getIdentifier() + SEP + uploadFolder + SEP + manifest; + // TODO: + // The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementatoin (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. getJobLogger().log(Level.INFO, "Reading checksum manifest: " + manifestAbsolutePath); Scanner scanner = null; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index 023b0876be4..724ef5795ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -99,6 +99,16 @@ public void open(Serializable checkpoint) throws Exception { directory = new File(System.getProperty("dataverse.files.directory") + SEP + dataset.getAuthority() + SEP + dataset.getIdentifier() + SEP + uploadFolder); + // TODO: + // The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementatoin (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. getJobLogger().log(Level.INFO, "Reading dataset directory: " + directory.getAbsolutePath() + " (excluding: " + excludes + ")"); if (isValidDirectory(directory)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index fc0d9362545..5f38cd8d224 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -71,6 +71,16 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { } File directory = new File(System.getProperty("dataverse.files.directory") + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier()); + // TODO: + // The above goes directly to the filesystem directory configured by the + // old "dataverse.files.directory" JVM option (otherwise used for temp + // files only, after the Multistore implementatoin (#6488). + // We probably want package files to be able to use specific stores instead. + // More importantly perhaps, the approach above does not take into account + // if the dataset may have an AlternativePersistentIdentifier, that may be + // designated isStorageLocationDesignator() - i.e., if a different identifer + // needs to be used to name the storage directory, instead of the main/current + // persistent identifier above. if (!isValidDirectory(directory)) { String error = "Dataset directory is invalid. " + directory; logger.info(error); @@ -86,6 +96,8 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { File uploadDirectory = new File(System.getProperty("dataverse.files.directory") + File.separator + dataset.getAuthority() + File.separator + dataset.getIdentifier() + File.separator + uploadFolder); + // TODO: + // see the comment above. if (!isValidDirectory(uploadDirectory)) { String error = "Upload folder is not a valid directory."; logger.info(error); From 90166ddca3e288ff354f22e4dd4729525af21145 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 26 Feb 2020 12:28:44 -0500 Subject: [PATCH 2/4] typo --- .../batch/jobs/importer/filesystem/FileRecordJobListener.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java index d6713a42e12..6b82a665c17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordJobListener.java @@ -442,7 +442,7 @@ private void loadChecksumManifest() { // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementatoin (#6488). + // files only, after the Multistore implementation (#6488). // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be From 688c7a69fc0750419ddf6e86d11d03928ebf59c2 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 26 Feb 2020 12:29:15 -0500 Subject: [PATCH 3/4] typo --- .../batch/jobs/importer/filesystem/FileRecordReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java index 724ef5795ff..b3d3a7107a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordReader.java @@ -102,7 +102,7 @@ public void open(Serializable checkpoint) throws Exception { // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementatoin (#6488). + // files only, after the Multistore implementation (#6488). // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be From 55ad201700bc4d03e1fcc38c0d85a5b40fb0d127 Mon Sep 17 00:00:00 2001 From: qqmyers Date: Wed, 26 Feb 2020 12:29:49 -0500 Subject: [PATCH 4/4] same typo --- .../engine/command/impl/ImportFromFileSystemCommand.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java index 5f38cd8d224..64beba82450 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportFromFileSystemCommand.java @@ -74,7 +74,7 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { // TODO: // The above goes directly to the filesystem directory configured by the // old "dataverse.files.directory" JVM option (otherwise used for temp - // files only, after the Multistore implementatoin (#6488). + // files only, after the Multistore implementation (#6488). // We probably want package files to be able to use specific stores instead. // More importantly perhaps, the approach above does not take into account // if the dataset may have an AlternativePersistentIdentifier, that may be