Merge pull request #1 from IQSS/develop

merge iqss develop into mdm develop
IQSS · Aug 4, 2020 · e3464bd · e3464bd
2 parents 2613d4e + 2b6be39
commit e3464bd
Show file tree

Hide file tree

Showing 42 changed files with 1,227 additions and 432 deletions.
diff --git a/doc/release-notes/4813-allow-duplicate-files.md b/doc/release-notes/4813-allow-duplicate-files.md
@@ -0,0 +1 @@
+We should note that duplicate files are now allowed, and installations may want to contact people now that this is available. Point to rules in the Guides.
diff --git a/doc/release-notes/6918-publishing-lock.md b/doc/release-notes/6918-publishing-lock.md
@@ -0,0 +1,6 @@
+The setting :PIDAsynchRegFileCount is deprecated as of v5.0. 
+
+It used to specify the number of datafiles in the dataset to warrant
+adding a lock during publishing. As of v5.0 all datasets get
+locked for the duration of the publishing process. The setting will be
+ignored if present.
diff --git a/doc/release-notes/6961-payara-upgrade.md b/doc/release-notes/6961-payara-upgrade.md
@@ -0,0 +1,82 @@
+Upgrade Dataverse from Glassfish 4.1 to Payara 5
+================================================
+
+The instruction below describes the upgrade procedure based on moving an existing glassfish4 domain directory under Payara. We recommend this method, instead of setting up a brand-new Payara domain using the installer because it appears to be the easiest way to recreate your current configuration and preserve all your data. 
+
+Download Payara, v5.2020.2 as of this writing:
+
+	# curl -L -O https://github.com/payara/Payara/releases/download/payara-server-5.2020.2/payara-5.2020.2.zip
+	# sha256sum payara-5.2020.2.zip 
+	  1f5f7ea30901b1b4c7bcdfa5591881a700c9b7e2022ae3894192ba97eb83cc3e
+
+Unzip it somewhere (/usr/local is a safe bet)
+
+	# sudo unzip payara-5.2020.2.zip -d /usr/local/
+
+Copy the Postgres driver to /usr/local/payara5/glassfish/lib
+
+	# sudo cp /usr/local/glassfish4/glassfish/lib/postgresql-42.2.9.jar /usr/local/payara5/glassfish/lib/
+
+Move payara5/glassfish/domains/domain1 out of the way
+
+	# sudo mv /usr/local/payara5/glassfish/domains/domain1 /usr/local/payara5/glassfish/domains/domain1.orig
+
+Undeploy the Dataverse web application (if deployed; version 4.20 is assumed in the example below)
+
+	# sudo /usr/local/glassfish4/bin/asadmin list-applications
+	# sudo /usr/local/glassfish4/bin/asadmin undeploy dataverse-4.20
+
+Stop Glassfish; copy domain1 to Payara
+
+	# sudo /usr/local/glassfish4/bin/asadmin stop-domain
+	# sudo cp -ar /usr/local/glassfish4/glassfish/domains/domain1 /usr/local/payara5/glassfish/domains/
+
+Remove the Glassfish cache directories
+
+	# sudo rm -rf /usr/local/payara5/glassfish/domains/domain1/generated/
+	# sudo rm -rf /usr/local/payara5/glassfish/domains/domain1/osgi-cache/
+
+In domain.xml:
+=============
+
+Replace the -XX:PermSize and -XX:MaxPermSize JVM options with -XX:MetaspaceSize and -XX:MaxMetaspaceSize.
+
+        <jvm-options>-XX:MetaspaceSize=256m</jvm-options>
+        <jvm-options>-XX:MaxMetaspaceSize=512m</jvm-options>
+
+Set both Xmx and Xms at startup to avoid runtime re-allocation. Your Xmx value should likely be higher: 
+
+	<jvm-options>-Xmx2048m</jvm-options>
+	<jvm-options>-Xms2048m</jvm-options>
+
+Add the below JVM options beneath the -Ddataverse settings:  
+
+	<jvm-options>-Dfish.payara.classloading.delegate=false</jvm-options>
+	<jvm-options>-XX:+UseG1GC</jvm-options>
+	<jvm-options>-XX:+UseStringDeduplication</jvm-options>
+	<jvm-options>-XX:+DisableExplicitGC</jvm-options>
+
+Change any full pathnames /usr/local/glassfish4/... to /usr/local/payara5/... or whatever it is in your case. (Specifically check the -Ddataverse.files.directory and -Ddataverse.files.file.directory JVM options)
+
+In domain1/config/jhove.conf, change the hard-coded /usr/local/glassfish4 path, as above.
+
+(Optional): If you renamed your service account from glassfish to payara or appserver, update the ownership permissions. The Installation Guide recommends a service account of `dataverse`:
+
+	# sudo chown -R dataverse /usr/local/payara5/glassfish/domains/domain1
+	# sudo chown -R dataverse /usr/local/payara5/glassfish/lib
+
+You will also need to check that the service account has write permission on the files directory, if they are located outside the old Glassfish domain. And/or make sure the service account has the correct AWS credentials, if you are using S3 for storage. 
+
+Finally, start Payara:
+
+	# sudo -u dataverse /usr/local/payara5/bin/asadmin start-domain
+
+Deploy the Dataverse 5 warfile:
+
+	# sudo -u dataverse /usr/local/payara5/bin/asadmin deploy /path/to/dataverse-5.0.war
+
+Then restart Payara:
+
+	# sudo -u dataverse /usr/local/payara5/bin/asadmin stop-domain
+	# sudo -u dataverse /usr/local/payara5/bin/asadmin start-domain
+
diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst
@@ -186,7 +186,6 @@ Here are the configuration options for DOIs:
 - :ref:`:IdentifierGenerationStyle <:IdentifierGenerationStyle>` (optional)
 - :ref:`:DataFilePIDFormat <:DataFilePIDFormat>` (optional)
 - :ref:`:FilePIDsEnabled <:FilePIDsEnabled>` (optional, defaults to true)
-- :ref:`:PIDAsynchRegFileCount <:PIDAsynchRegFileCount>` (optional, defaults to 10)
 
 Configuring Dataverse for Handles
 +++++++++++++++++++++++++++++++++
@@ -1446,24 +1445,13 @@ Note that in either case, when using the ``sequentialNumber`` option, datasets a
 :FilePIDsEnabled
 ++++++++++++++++
 
-Toggles publishing of file-based PIDs for the entire installation. By default this setting is absent and Dataverse assumes it to be true.
+Toggles publishing of file-based PIDs for the entire installation. By default this setting is absent and Dataverse assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset.
 
 If you don't want to register file-based PIDs for your installation, set:
 
 ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:FilePIDsEnabled``
 
-Note: File-level PID registration was added in 4.9 and is required until version 4.9.3.
-
-Note: The dataset will be locked, and the registration will be performed asynchronously, when there are more than N files in the dataset, where N is configured by the database setting ``:PIDAsynchRegFileCount`` (default: 10). 
-
-.. _:PIDAsynchRegFileCount:
-
-:PIDAsynchRegFileCount
-++++++++++++++++++++++
-
-Configures the number of files in the dataset to warrant performing the registration of persistent identifiers (section above) and/or file validation asynchronously during publishing. The setting is optional, and the default value is 10.
-
-``curl -X PUT -d '100' http://localhost:8080/api/admin/settings/:PIDAsynchRegFileCount``
+Note: File-level PID registration was added in 4.9; it could not be disabled until version 4.9.3.
 
 .. _:IndependentHandleService:
 
@@ -1480,14 +1468,12 @@ By default this setting is absent and Dataverse assumes it to be false.
 :FileValidationOnPublishEnabled
 +++++++++++++++++++++++++++++++
 
-Toggles validation of the physical files in the dataset when it's published, by recalculating the checksums and comparing against the values stored in the DataFile table. By default this setting is absent and Dataverse assumes it to be true.
+Toggles validation of the physical files in the dataset when it's published, by recalculating the checksums and comparing against the values stored in the DataFile table. By default this setting is absent and Dataverse assumes it to be true. If enabled, the validation will be performed asynchronously, similarly to how we handle assigning persistent identifiers to datafiles, with the dataset locked for the duration of the publishing process. 
 
 If you don't want the datafiles to be validated on publish, set:
 
 ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:FileValidationOnPublishEnabled``
 
-Note: The dataset will be locked, and the validation will be performed asynchronously, similarly to how we handle assigning persistend identifiers to datafiles, when there are more than N files in the dataset, where N is configured by the database setting ``:PIDAsynchRegFileCount`` (default: 10). 
-
 
 :ApplicationTermsOfUse
 ++++++++++++++++++++++

diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst
@@ -73,7 +73,6 @@ You can upload files to a dataset while first creating that dataset. You can als
 
 Certain file types in Dataverse are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation with UNFs, and exploration through data visualization and analysis. See the :ref:`File Handling <file-handling>` section of this page for more information.
 
-
 HTTP Upload
 -----------
 
@@ -147,6 +146,20 @@ File Handling
 
 Certain file types in Dataverse are supported by additional functionality, which can include downloading in different formats, previews, file-level metadata preservation, file-level data citation; and exploration through data visualization and analysis. See the sections below for information about special functionality for specific file types.
 
+.. _duplicate-files:
+
+Duplicate Files
+===============
+
+Beginning with Dataverse 5.0, the way Dataverse handles duplicate files (filename and checksums) is changing to be more flexible. Specifically:
+
+- Files with the same checksum can be included in a dataset, even if the files are in the same directory.
+- Files with the same filename can be included in a dataset as long as the files are in different directories.
+- If a user uploads a file to a directory where a file already exists with that directory/filename combination, Dataverse will adjust the file path and names by adding "-1" or "-2" as applicable. This change will be visible in the list of files being uploaded. 
+- If the directory or name of an existing or newly uploaded file is edited in such a way that would create a directory/filename combination that already exists, Dataverse will display an error.
+- If a user attempts to replace a file with another file that has the same checksum, an error message will be displayed and the file will not be able to be replaced.
+- If a user attempts to replace a file with a file that has the same checksum as a different file in the dataset, a warning will be displayed.
+
 File Previews
 -------------
 
@@ -268,7 +281,7 @@ Variable Metadata can be edited directly through an API call (:ref:`API Guide: E
 File Path
 ---------
 
-The File Path metadata field is Dataverse's way of representing a file's location in a folder structure. When a user uploads a .zip file containing a folder structure, Dataverse automatically fills in the File Path information for each file contained in the .zip. If a user downloads the full dataset or a selection of files from it, they will receive a folder structure with each file positioned according to its File Path.
+The File Path metadata field is Dataverse's way of representing a file's location in a folder structure. When a user uploads a .zip file containing a folder structure, Dataverse automatically fills in the File Path information for each file contained in the .zip. If a user downloads the full dataset or a selection of files from it, they will receive a folder structure with each file positioned according to its File Path. Only one file with a given path and name may exist in a dataset. Editing a file to give it the same path and name as another file already existing in the dataset will cause an error.
 
 A file's File Path can be manually added or edited on the Edit Files page. Changing a file's File Path will change its location in the folder structure that is created when a user downloads the full dataset or a selection of files from it.
 

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java
@@ -254,6 +254,37 @@ public boolean isDeleted() {
     public void setDeleted(boolean deleted) {
         this.deleted = deleted;
     }
+
+    /*
+    For use during file upload so that the user may delete 
+    files that have already been uploaded to the current dataset version
+    */
+
+    @Transient
+    private boolean markedAsDuplicate;
+
+    public boolean isMarkedAsDuplicate() {
+        return markedAsDuplicate;
+    }
+
+    public void setMarkedAsDuplicate(boolean markedAsDuplicate) {
+        this.markedAsDuplicate = markedAsDuplicate;
+    }
+
+    @Transient
+    private String duplicateFilename;
+
+    public String getDuplicateFilename() {
+        return duplicateFilename;
+    }
+
+    public void setDuplicateFilename(String duplicateFilename) {
+        this.duplicateFilename = duplicateFilename;
+    }
+
+
+
+
 
     /**
      * All constructors should use this method

diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
@@ -2128,6 +2128,15 @@ private void displayLockInfo(Dataset dataset) {
                     BundleUtil.getStringFromBundle("dataset.locked.ingest.message"));
             lockedDueToIngestVar = true;
         }
+
+        // With DataCite, we try to reserve the DOI when the dataset is created. Sometimes this
+        // fails because DataCite is down. We show the message below to set expectations that the
+        // "Publish" button won't work until the DOI has been reserved using the "Reserve PID" API.
+        if (settingsWrapper.isDataCiteInstallation() && dataset.getGlobalIdCreateTime() == null && editMode != EditMode.CREATE) {
+            JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message"),
+                    BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message.details"));
+        }
+
     }
 
     private Boolean fileTreeViewRequired = null; 
@@ -2649,7 +2658,7 @@ private String releaseDataset(boolean minor) {
         } else {
             JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataset.message.only.authenticatedUsers"));
         }
-        return returnToDatasetOnly();
+        return returnToDraftVersion();
     }
 
     @Deprecated

diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java b/src/main/java/edu/harvard/iq/dataverse/DataverseSession.java
@@ -6,6 +6,7 @@
 import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean;
 import edu.harvard.iq.dataverse.authorization.users.GuestUser;
 import edu.harvard.iq.dataverse.authorization.users.User;
+import edu.harvard.iq.dataverse.util.SessionUtil;
 import edu.harvard.iq.dataverse.util.SystemConfig;
 import java.io.IOException;
 import java.io.Serializable;
@@ -61,7 +62,8 @@ public void setUser(User aUser) {
         logSvc.log( 
                 new ActionLogRecord(ActionLogRecord.ActionType.SessionManagement,(aUser==null) ? "logout" : "login")
                     .setUserIdentifier((aUser!=null) ? aUser.getIdentifier() : (user!=null ? user.getIdentifier() : "") ));
-
+        //#3254 - change session id when user changes
+        SessionUtil.changeSessionId((HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest());
         this.user = aUser;
     }