diff --git a/doc/release-notes/5042-add-mydata-doc-api.md b/doc/release-notes/5042-add-mydata-doc-api.md new file mode 100644 index 00000000000..5a77e266725 --- /dev/null +++ b/doc/release-notes/5042-add-mydata-doc-api.md @@ -0,0 +1,2 @@ +An API named 'MyData' is supported by Dataverse. A documentation has been added describing its use (PR #9596) +This API is used to get a list of only the objects (datasets, dataverses or datafiles) that an authenticated user can modify. diff --git a/doc/release-notes/8889-filepids-in-collections.md b/doc/release-notes/8889-filepids-in-collections.md new file mode 100644 index 00000000000..bc8aeea3b56 --- /dev/null +++ b/doc/release-notes/8889-filepids-in-collections.md @@ -0,0 +1,3 @@ +It is now possible to configure registering PIDs for files in individual collections. + +For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See the [:FilePIDsEnabled](https://guides.dataverse.org/en/latest/installation/config.html#filepidsenabled) section of the Configuration guide for details. \ No newline at end of file diff --git a/doc/release-notes/9431-checksum-alg-in-direct-uploads.md b/doc/release-notes/9431-checksum-alg-in-direct-uploads.md new file mode 100644 index 00000000000..e754686f3f0 --- /dev/null +++ b/doc/release-notes/9431-checksum-alg-in-direct-uploads.md @@ -0,0 +1,4 @@ +Direct upload via the Dataverse UI will now support any algorithm configured via the :FileFixityChecksumAlgorithm setting. +External apps using the direct upload API can now query Dataverse to discover which algorithm should be used. + +Sites that have been using an algorithm other than MD5 and direct upload and/or dvwebloader may want to use the /api/admin/updateHashValues call (see https://guides.dataverse.org/en/latest/installation/config.html?highlight=updatehashvalues#filefixitychecksumalgorithm) to replace any MD5 hashes on existing files. diff --git a/doc/release-notes/9480-h5web.md b/doc/release-notes/9480-h5web.md new file mode 100644 index 00000000000..97beff70e4a --- /dev/null +++ b/doc/release-notes/9480-h5web.md @@ -0,0 +1 @@ +A file previewer called H5Web is now available for exploring and visualizing NetCDF and HDF5 files. diff --git a/doc/release-notes/9558-async-indexing.md b/doc/release-notes/9558-async-indexing.md new file mode 100644 index 00000000000..a44eac1ff75 --- /dev/null +++ b/doc/release-notes/9558-async-indexing.md @@ -0,0 +1,3 @@ +Performance improvements, especially for large datasets containing thousands of files. +Uploading files one by one to the dataset is much faster now, allowing uploading thousands of files in an acceptable timeframe. Not only uploading a file, but all edit operations on datasets containing many files, got faster. +Performance tweaks include indexing of the datasets in the background and optimizations in the amount of the indexing operations needed. Furthermore, updates to the dateset no longer wait for ingesting to finish. Ingesting was already running in the background, but it took a lock, preventing updating the dataset and degrading performance for datasets containing many files. \ No newline at end of file diff --git a/doc/release-notes/9588-datasets-api-extension.md b/doc/release-notes/9588-datasets-api-extension.md new file mode 100644 index 00000000000..f4fd6354d47 --- /dev/null +++ b/doc/release-notes/9588-datasets-api-extension.md @@ -0,0 +1,6 @@ +The following APIs have been added: + +- /api/datasets/summaryFieldNames +- /api/datasets/privateUrlDatasetVersion/{privateUrlToken} +- /api/datasets/privateUrlDatasetVersion/{privateUrlToken}/citation +- /api/datasets/{datasetId}/versions/{version}/citation diff --git a/doc/release-notes/9656-api-optional-dataset-params.md b/doc/release-notes/9656-api-optional-dataset-params.md new file mode 100644 index 00000000000..5d08f26386a --- /dev/null +++ b/doc/release-notes/9656-api-optional-dataset-params.md @@ -0,0 +1,5 @@ +The following fields are now available in the native JSON output: + +- alternativePersistentId +- publicationDate +- citationDate diff --git a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv index 4fc9849f7a5..6e0eb810b27 100644 --- a/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv +++ b/doc/sphinx-guides/source/_static/admin/dataverse-external-tools.tsv @@ -2,5 +2,5 @@ Tool Type Scope Description Data Explorer explore file A GUI which lists the variables in a tabular data file allowing searching, charting and cross tabulation analysis. See the README.md file at https://github.com/scholarsportal/dataverse-data-explorer-v2 for the instructions on adding Data Explorer to your Dataverse. Whole Tale explore dataset A platform for the creation of reproducible research packages that allows users to launch containerized interactive analysis environments based on popular tools such as Jupyter and RStudio. Using this integration, Dataverse users can launch Jupyter and RStudio environments to analyze published datasets. For more information, see the `Whole Tale User Guide `_. Binder explore dataset Binder allows you to spin up custom computing environments in the cloud (including Jupyter notebooks) with the files from your dataset. `Installation instructions `_ are in the Data Exploration Lab girder_ythub project. See also :ref:`binder`. -File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers +File Previewers explore file A set of tools that display the content of files - including audio, html, `Hypothes.is `_ annotations, images, PDF, text, video, tabular data, spreadsheets, GeoJSON, zip, HDF5, NetCDF, and NcML files - allowing them to be viewed without downloading the file. The previewers can be run directly from github.io, so the only required step is using the Dataverse API to register the ones you want to use. Documentation, including how to optionally brand the previewers, and an invitation to contribute through github are in the README.md file. Initial development was led by the Qualitative Data Repository and the spreasdheet previewer was added by the Social Sciences and Humanities Open Cloud (SSHOC) project. https://github.com/gdcc/dataverse-previewers Data Curation Tool configure file A GUI for curating data by adding labels, groups, weights and other details to assist with informed reuse. See the README.md file at https://github.com/scholarsportal/Dataverse-Data-Curation-Tool for the installation instructions. diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json index 47413c8a625..22dd6477cb4 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/dynamicDatasetTool.json @@ -14,14 +14,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDatasetJson", - "httpMethod":"GET", - "urlTemplate":"/api/v1/datasets/{datasetId}", - "timeOut":10 - } - ] - } + ] + }, + "allowedApiCalls": [ + { + "name":"retrieveDatasetJson", + "httpMethod":"GET", + "urlTemplate":"/api/v1/datasets/{datasetId}", + "timeOut":10 + } + ] } diff --git a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json index 1c132576099..2b6a0b8e092 100644 --- a/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json +++ b/doc/sphinx-guides/source/_static/installation/files/root/external-tools/fabulousFileTool.json @@ -21,14 +21,14 @@ { "locale":"{localeCode}" } - ], - "allowedApiCalls": [ - { - "name":"retrieveDataFile", - "httpMethod":"GET", - "urlTemplate":"/api/v1/access/datafile/{fileId}", - "timeOut":270 - } ] - } + }, + "allowedApiCalls": [ + { + "name":"retrieveDataFile", + "httpMethod":"GET", + "urlTemplate":"/api/v1/access/datafile/{fileId}", + "timeOut":270 + } + ] } diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index 7f32e8c2514..92e01578f71 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -118,6 +118,28 @@ Creates a link between a dataset and a Dataverse collection (see the :ref:`datas curl -H "X-Dataverse-key: $API_TOKEN" -X PUT http://$SERVER/api/datasets/$linked-dataset-id/link/$linking-dataverse-alias +List Collections that are Linked from a Dataset +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Lists the link(s) created between a dataset and a Dataverse collection (see the :ref:`dataset-linking` section of the User Guide for more information). :: + + curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/datasets/$linked-dataset-id/links + +It returns a list in the following format: + +.. code-block:: json + + { + "status": "OK", + "data": { + "dataverses that link to dataset id 56782": [ + "crc990 (id 18802)" + ] + } + } + +.. _unlink-a-dataset: + Unlink a Dataset ^^^^^^^^^^^^^^^^ @@ -131,15 +153,32 @@ Mint a PID for a File That Does Not Have One In the following example, the database id of the file is 42:: export FILE_ID=42 - curl http://localhost:8080/api/admin/$FILE_ID/registerDataFile + curl "http://localhost:8080/api/admin/$FILE_ID/registerDataFile" + +Mint PIDs for all unregistered published files in the specified collection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Mint PIDs for Files That Do Not Have Them -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The following API will register the PIDs for all the yet unregistered published files in the datasets **directly within the collection** specified by its alias:: -If you have a large number of files, you might want to consider miniting PIDs for files individually using the ``registerDataFile`` endpoint above in a for loop, sleeping between each registration:: + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}" + +It will not attempt to register the datafiles in its sub-collections, so this call will need to be repeated on any sub-collections where files need to be registered as well. File-level PID registration must be enabled on the collection. (Note that it is possible to have it enabled for a specific collection, even when it is disabled for the Dataverse installation as a whole. See :ref:`collection-attributes-api` in the Native API Guide.) + +This API will sleep for 1 second between registration calls by default. A longer sleep interval can be specified with an optional ``sleep=`` parameter:: + + curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}?sleep=5" + +Mint PIDs for ALL unregistered files in the database +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following API will attempt to register the PIDs for all the published files in your instance that do not yet have them:: curl http://localhost:8080/api/admin/registerDataFileAll +The application will attempt to sleep for 1 second between registration attempts as not to overload your persistent identifier service provider. Note that if you have a large number of files that need to be registered in your Dataverse, you may want to consider minting file PIDs within indivdual collections, or even for individual files using the ``registerDataFiles`` and/or ``registerDataFile`` endpoints above in a loop, with a longer sleep interval between calls. + + + Mint a New DOI for a Dataset with a Handle ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 1480c66f8b6..058ba0d8725 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -95,6 +95,11 @@ Each of the three main sections own sets of properties: | displayName | Acts as a brief label for display related to this | Should be relatively brief. The limit is 256 character, | | | #metadataBlock. | but very long names might cause display problems. | +----------------+---------------------------------------------------------+---------------------------------------------------------+ +| displayFacet | Label displayed in the search area when this | Should be brief. Long names will cause display problems | +| | #metadataBlock is configured as a search facet | in the search area. | +| | for a collection. See | | +| | :ref:`the API `. | | ++----------------+---------------------------------------------------------+---------------------------------------------------------+ | blockURI | Associates the properties in a block with an external | The citation #metadataBlock has the blockURI | | | URI. | https://dataverse.org/schema/citation/ which assigns a | | | Properties will be assigned the | default global URI to terms such as | @@ -452,12 +457,16 @@ metadatablock.name=(the value of **name** property from #metadatablock) metadatablock.displayName=(the value of **displayName** property from #metadatablock) +metadatablock.displayFacet=(the value of **displayFacet** property from #metadatablock) + example: metadatablock.name=citation metadatablock.displayName=Citation Metadata +metadatablock.displayFacet=Citation + #datasetField (field) properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ datasetfieldtype.(the value of **name** property from #datasetField).title=(the value of **title** property from #datasetField) diff --git a/doc/sphinx-guides/source/api/getting-started.rst b/doc/sphinx-guides/source/api/getting-started.rst index c465b726421..544f0921bd7 100644 --- a/doc/sphinx-guides/source/api/getting-started.rst +++ b/doc/sphinx-guides/source/api/getting-started.rst @@ -52,6 +52,20 @@ If you ever want to check an environment variable, you can "echo" it like this: echo $SERVER_URL +With curl version 7.56.0 and higher, it is recommended to use --form-string with outer quote rather than -F flag without outer quote. + +For example, curl command parameter below might cause error such as ``warning: garbage at end of field specification: ,"categories":["Data"]}``. + +.. code-block:: bash + + -F jsonData={\"description\":\"My description.\",\"categories\":[\"Data\"]} + +Instead, use --form-string with outer quote. See https://github.com/curl/curl/issues/2022 + +.. code-block:: bash + + --form-string 'jsonData={"description":"My description.","categories":["Data"]}' + If you don't like curl, don't have curl, or want to use a different programming language, you are encouraged to check out the Python, Javascript, R, and Java options in the :doc:`client-libraries` section. .. _curl: https://curl.haxx.se diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 369e92ba129..ccf172eb84e 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -229,6 +229,8 @@ The fully expanded example above (without environment variables) looks like this Where :download:`dataverse-facets.json <../_static/api/dataverse-facets.json>` contains a JSON encoded list of metadata keys (e.g. ``["authorName","authorAffiliation"]``). +.. _metadata-block-facet-api: + List Metadata Block Facets Configured for a Dataverse Collection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -736,6 +738,24 @@ The fully expanded example above (without environment variables) looks like this curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx https://demo.dataverse.org/api/dataverses/root/guestbookResponses?guestbookId=1 -o myResponses.csv +.. _collection-attributes-api: + +Change Collection Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: + + curl -X PUT -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/attribute/$ATTRIBUTE?value=$VALUE" + +The following attributes are supported: + +* ``alias`` Collection alias +* ``name`` Name +* ``description`` Description +* ``affiliation`` Affiliation +* ``filePIDsEnabled`` ("true" or "false") Enables or disables registration of file-level PIDs in datasets within the collection (overriding the instance-wide setting). + + Datasets -------- @@ -2173,6 +2193,50 @@ Signposting is not supported for draft dataset versions. curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/linkset?persistentId=$PERSISTENT_IDENTIFIER" +Get Dataset By Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN" + +Get Citation +~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PERSISTENT_IDENTIFIER=doi:10.5072/FK2/YD5QDG + export VERSION=1.0 + + curl -H "Accept:application/json" "$SERVER_URL/api/datasets/:persistentId/versions/$VERSION/{version}/citation?persistentId=$PERSISTENT_IDENTIFIER" + +Get Citation by Private URL Token +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + export PRIVATE_URL_TOKEN=a56444bc-7697-4711-8964-e0577f055fd2 + + curl "$SERVER_URL/api/datasets/privateUrlDatasetVersion/$PRIVATE_URL_TOKEN/citation" + +.. _get-dataset-summary-field-names: + +Get Summary Field Names +~~~~~~~~~~~~~~~~~~~~~~~ + +See :ref:`:CustomDatasetSummaryFields` in the Installation Guide for how the list of dataset fields that summarize a dataset can be customized. Here's how to list them: + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/datasets/summaryFieldNames" + Files ----- @@ -2963,6 +3027,22 @@ The response is a JSON object described in the :doc:`/api/external-tools` sectio curl -H "X-Dataverse-key: $API_TOKEN" -H "Accept:application/json" "$SERVER_URL/api/files/$FILE_ID/metadata/$FILEMETADATA_ID/toolparams/$TOOL_ID +.. _get-fixity-algorithm: + +Get Fixity Algorithm +~~~~~~~~~~~~~~~~~~~~~~ + +This API call can be used to discover the configured fixity/checksum algorithm being used by a Dataverse installation (as configured by - :ref:`:FileFixityChecksumAlgorithm`). +Currently, the possible values are MD5, SHA-1, SHA-256, and SHA-512. +This algorithm will be used when the Dataverse software manages a file upload and should be used by external clients uploading files to a Dataverse instance. (Existing files may or may not have checksums with this algorithm.) + +.. code-block:: bash + + export SERVER_URL=https://demo.dataverse.org + + curl "$SERVER_URL/api/files/fixityAlgorithm + + Users Token Management ---------------------- @@ -4351,6 +4431,26 @@ It will report the specific files that have failed the validation. For example:: These are only available to super users. +.. _UpdateChecksums: + +Update Checksums To Use New Algorithm +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The fixity algorithm used on existing files can be changed by a superuser using this API call. An optional query parameter (num) can be used to limit the number of updates attempted (i.e. to do processing in batches). +The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. +Statistics concerning the updates are returned in the response to the API call with details in the log. +The primary use for this API call is to update existing files after the algorithm used when uploading new files is changes - see - :ref:`:FileFixityChecksumAlgorithm`. +Allowed values are MD5, SHA-1, SHA-256, and SHA-512 + +.. code-block:: bash + + export ALG=SHA-256 + export BATCHSIZE=1 + + curl http://localhost:8080/api/admin/updateHashValues/$ALG + curl http://localhost:8080/api/admin/updateHashValues/$ALG?num=$BATCHSIZE + + .. _dataset-validation-api: Dataset Validation @@ -4580,6 +4680,7 @@ A curl example using allowing access to a dataset's metadata Please see :ref:`dataverse.api.signature-secret` for the configuration option to add a shared secret, enabling extra security. + .. _send-feedback: Send Feedback To Contact(s) @@ -4605,3 +4706,42 @@ A curl example using an ``ID`` curl -X POST -H 'Content-Type:application/json' -d "$JSON" $SERVER_URL/api/admin/feedback Note that this call could be useful in coordinating with dataset authors (assuming they are also contacts) as an alternative/addition to the functionality provided by :ref:`return-a-dataset`. + + +MyData +------ + +The MyData API is used to get a list of just the datasets, dataverses or datafiles an authenticated user can edit. + +A curl example listing objects + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ROLE_IDS=6 + export DVOBJECT_TYPES=Dataset + export PUBLISHED_STATES=Unpublished + export PER_PAGE=10 + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/mydata/retrieve?role_ids=$ROLE_IDS&dvobject_types=$DVOBJECT_TYPES&published_states=$PUBLISHED_STATES&per_page=$PER_PAGE" + +Parameters: + +``role_id`` Roles are customizable. Standard roles include: + +- ``1`` = Admin +- ``2`` = File Downloader +- ``3`` = Dataverse + Dataset Creator +- ``4`` = Dataverse Creator +- ``5`` = Dataset Creator +- ``6`` = Contributor +- ``7`` = Curator +- ``8`` = Member + +``dvobject_types`` Type of object, several possible values among: ``DataFile`` , ``Dataset`` & ``Dataverse`` . + +``published_states`` State of the object, several possible values among:``Published`` , ``Unpublished`` , ``Draft`` , ``Deaccessioned`` & ``In+Review`` . + +``per_page`` Number of results returned per page. + diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 5b8253b53f7..3fbe55766d5 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -9,6 +9,8 @@ Please note! This Docker setup is not for production! Quickstart ---------- +First, install Java 11 and Maven. + After cloning the repo, try this: ``mvn -Pct clean package docker:run`` @@ -27,25 +29,157 @@ Assuming you have `Docker `_, `Docker D you have Java and Maven installed, as you are at least about to develop code changes. To test drive these local changes to the Dataverse codebase in a containerized application server (and avoid the -setup described in :doc:`../developers/dev-environment`), you must a) build the application container and b) -run it in addition to the necessary dependencies. - -Building and Running --------------------- +setup described in :doc:`../developers/dev-environment`), you must a) build the application and b) run it in addition +to the necessary dependencies. (Which might involve building a new local version of the :doc:`configbaker-image`.) -To build the application image, run the following command, as described in :doc:`app-image`: +.. _dev-build: -``mvn -Pct clean package`` +Building +-------- -Now, start all the containers with a single command: +To build the :doc:`application ` and :doc:`config baker image `, run the following command: -``mvn -Pct docker:run`` +``mvn -Pct clean package`` -(You could also concatenate both commands into one, as shown above in the quickstart.) +Once this is done, you will see images ``gdcc/dataverse:unstable`` and ``gdcc/configbaker:unstable`` available in your +Docker cache. + +**Note:** This will skip any unit tests. If you have built the code before for testing, etc. you might omit the +``clean`` to avoid recompiling. + +**Note:** Also we have a ``docker-compose-dev.yml`` file, it's currently not possible to build the images without +invoking Maven. This might change in the future. + + +.. _dev-run: + +Running +------- + +After building the app and config baker image containing your local changes to the Dataverse application, you want to +run it together with all dependencies. There are four ways to do this (commands executed at root of project directory): + +.. list-table:: Cheatsheet: Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct docker:run`` + - ``docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct docker:start`` + - ``docker compose -f docker-compose-dev.yml up -d`` + +Both ways have their pros and cons: + +.. list-table:: Decision Helper: Fore- or Background? + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Pros + - Cons + * - Foreground + - | Logs scroll by when interacting with API / UI + | To stop all containers simply hit ``Ctrl+C`` + - | Lots and lots of logs scrolling by + | Must stop all containers to restart + * - Background + - | No logs scrolling by + | Easy to replace single containers + - | No logs scrolling by + | Stopping containers needs an extra command + +In case you want to concatenate building and running, here's a cheatsheet for you: + +.. list-table:: Cheatsheet: Building and Running Containers + :widths: 15 40 45 + :header-rows: 1 + :stub-columns: 1 + :align: left + + * - \ + - Using Maven + - Using Compose + * - In foreground + - ``mvn -Pct package docker:run`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up`` + * - In background + - ``mvn -Pct package docker:start`` + - ``mvn -Pct package && docker compose -f docker-compose-dev.yml up -d`` + +Once all containers have been started, you can check if the application was deployed correctly by checking the version +at http://localhost:8080/api/info/version or watch the logs. + +**Note:** To stop all containers you started in background, invoke ``mvn -Pct docker:stop`` or +``docker compose -f docker-compose-dev.yml down``. Check that you can log in to http://localhost:8080 using user ``dataverseAdmin`` and password ``admin1``. -You can also access the Payara Admin Console if needed, which is available at http://localhost:4848. To log in, use user ``admin`` and password ``admin``. As a reminder, the application container is for development use only, so we are exposing the admin console for testing purposes. In a production environment, it may be more convenient to leave this console unopened. +You can also access the Payara Admin Console if needed, which is available at http://localhost:4848. To log in, use +user ``admin`` and password ``admin``. As a reminder, the application container is for development use only, so we +are exposing the admin console for testing purposes. In a production environment, it may be more convenient to leave +this console unopened. Note that data is persisted in ``./docker-dev-volumes`` in the root of the Git repo. For a clean start, you should remove this directory before running the ``mvn`` commands above. + + +.. _dev-logs: + +Viewing Logs +------------ + +In case you started containers in background mode (see :ref:`dev-run`), you can use the following commands to view and/or +watch logs from the containers. + +The safe bet for any running container's logs is to lookup the container name via ``docker ps`` and use it in +``docker logs ``. You can tail logs by adding ``-n`` and follow them by adding ``-f`` (just like ``tail`` cmd). +See ``docker logs --help`` for more. + +Alternatives: + +- In case you used Maven for running, you may use ``mvn -Pct docker:logs -Ddocker.filter=``. +- If you used Docker Compose for running, you may use ``docker compose -f docker-compose-dev.yml logs ``. + Options are the same. + + +Re-Deploying +------------ + +Currently, the only safe and tested way to re-deploy the Dataverse application after you applied code changes is +by recreating the container(s). In the future, more options may be added here. + +If you started your containers in foreground, just stop them and follow the steps for building and running again. +The same goes for using Maven to start the containers in the background. + +In case of using Docker Compose and starting the containers in the background, you can use a workaround to only +restart the application container: + +.. code-block:: + + # First rebuild the container (will complain about an image still in use, this is fine.) + mvn -Pct package + # Then re-create the container (will automatically restart the container for you) + docker compose -f docker-compose-dev.yml create dev_dataverse + +Using ``docker container inspect dev_dataverse | grep Image`` you can verify the changed checksums. + +Using A Debugger +---------------- + +The :doc:`base-image` enables usage of the `Java Debugging Wire Protocol `_ +for remote debugging if you set ``ENABLE_JDWP=1`` as environment variable for the application container. +The default configuration when executing containers with the commands listed at :ref:`dev-run` already enables this. + +There are a lot of tutorials how to connect your IDE's debugger to a remote endpoint. Please use ``localhost:9009`` +as the endpoint. Here are links to the most common IDEs docs on remote debugging: +`Eclipse `_, +`IntelliJ `_ diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index 58b25d8b941..b3f7fb1c1af 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -7,12 +7,20 @@ These instructions are purposefully opinionated and terse to help you get your d .. contents:: |toctitle| :local: -Quick Start ------------ +Quick Start (Docker) +-------------------- -The quickest way to get the Dataverse Software running is to use Vagrant as described in the :doc:`tools` section, or use Docker containers as described the :doc:`../container/dev-usage` section of the Container Guide. +The quickest way to get Dataverse running is in Docker as explained in :doc:`../container/dev-usage` section of the Container Guide. -For day to day development work, we recommended the following setup. + +Classic Dev Environment +----------------------- + +Since before Docker existed, we have encouraged installing Dataverse and all its dependencies directly on your development machine, as described below. This can be thought of as the "classic" development environment for Dataverse. + +However, in 2023 we decided that we'd like to encourage all developers to start using Docker instead and opened https://github.com/IQSS/dataverse/issues/9616 to indicate that we plan to rewrite this page to recommend the use of Docker. + +There's nothing wrong with the classic instructions below and we don't plan to simply delete them. They are a valid alternative to running Dataverse in Docker. We will likely move them to another page. Set Up Dependencies ------------------- @@ -22,7 +30,7 @@ Supported Operating Systems Mac OS X or Linux is required because the setup scripts assume the presence of standard Unix utilities. -Windows is not well supported, unfortunately, but Vagrant and Minishift environments are described in the :doc:`windows` section. +Windows is gaining support through Docker as described in the :doc:`windows` section. Install Java ~~~~~~~~~~~~ @@ -87,12 +95,14 @@ To install Payara, run the following commands: ``cd /usr/local`` -``sudo curl -O -L https://s3-eu-west-1.amazonaws.com/payara.fish/Payara+Downloads/5.2022.3/payara-5.2022.3.zip`` +``sudo curl -O -L https://nexus.payara.fish/repository/payara-community/fish/payara/distributions/payara/5.2022.3/payara-5.2022.3.zip`` ``sudo unzip payara-5.2022.3.zip`` ``sudo chown -R $USER /usr/local/payara5`` +If nexus.payara.fish is ever down for maintenance, Payara distributions are also available from https://repo1.maven.org/maven2/fish/payara/distributions/payara/ + Install Service Dependencies Directly on localhost ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -184,6 +194,10 @@ If you want to remove the containers, then run: ``docker-compose -f docker-compose-dev.yml down`` +If you want to run a single container (the mail server, for example) then run: + +``docker-compose -f docker-compose-dev.yml up dev_smtp`` + For a fresh installation, and before running the Software Installer Script, it is recommended to delete the docker-dev-env folder to avoid installation problems due to existing data in the containers. Run the Dataverse Software Installer Script diff --git a/doc/sphinx-guides/source/developers/windows.rst b/doc/sphinx-guides/source/developers/windows.rst index e278b193e12..53578fe980c 100755 --- a/doc/sphinx-guides/source/developers/windows.rst +++ b/doc/sphinx-guides/source/developers/windows.rst @@ -2,86 +2,17 @@ Windows Development =================== -Development on Windows is not well supported, unfortunately. You will have a much easier time if you develop on Mac or Linux as described under :doc:`dev-environment` section. - -Vagrant commands appear below and were tested on Windows 10 but the Vagrant environment is currently broken. Please see https://github.com/IQSS/dataverse/issues/6849 - -There is a newer, experimental Docker option described under :doc:`/container/dev-usage` in the Container Guide. +Historically, development on Windows is `not well supported `_ but as of 2023 a container-based approach is recommended. .. contents:: |toctitle| :local: -Running the Dataverse Software in Vagrant ------------------------------------------ - -Install Vagrant -~~~~~~~~~~~~~~~ - -Download and install Vagrant from https://www.vagrantup.com - -Vagrant advises you to reboot but let's install VirtualBox first. - -Install VirtualBox -~~~~~~~~~~~~~~~~~~ - -Download and install VirtualBox from https://www.virtualbox.org - -Note that we saw an error saying "Oracle VM VirtualBox 5.2.8 Setup Wizard ended prematurely" but then we re-ran the installer and it seemed to work. - -Reboot -~~~~~~ - -Again, Vagrant asks you to reboot, so go ahead. - -Install Git -~~~~~~~~~~~ - -Download and install Git from https://git-scm.com - -Configure Git to use Unix Line Endings -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Launch Git Bash and run the following commands: - -``git config --global core.autocrlf input`` - -Pro tip: Use Shift-Insert to paste into Git Bash. - -See also https://help.github.com/articles/dealing-with-line-endings/ - -If you skip this step you are likely to see the following error when you run ``vagrant up``. - -``/tmp/vagrant-shell: ./install: /usr/bin/perl^M: bad interpreter: No such file or directory`` - -Clone Git Repo -~~~~~~~~~~~~~~ - -From Git Bash, run the following command: - -``git clone https://github.com/IQSS/dataverse.git`` - -vagrant up -~~~~~~~~~~ - -From Git Bash, run the following commands: - -``cd dataverse`` - -The ``dataverse`` directory you changed is the one you just cloned. Vagrant will operate on a file called ``Vagrantfile``. - -``vagrant up`` - -After a long while you hopefully will have a Dataverse installation available at http://localhost:8888 - -Improving Windows Support -------------------------- - -Windows Subsystem for Linux -~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Running Dataverse in Docker on Windows +-------------------------------------- -We have been unable to get Windows Subsystem for Linux (WSL) to work. We tried following the steps at https://docs.microsoft.com/en-us/windows/wsl/install-win10 but the "Get" button was greyed out when we went to download Ubuntu. +See the `post `_ by Akio Sone for additional details, but please observe the following: -Discussion and Feedback -~~~~~~~~~~~~~~~~~~~~~~~ +- In git, the line-ending setting should be set to always LF (line feed, ``core.autocrlf=input``) +- You must have jq installed: https://jqlang.github.io/jq/download/ -For more discussion of Windows support for Dataverse Software development see our community list thread `"Do you want to develop on Windows?" `_ We would be happy to incorporate feedback from Windows developers into this page. The :doc:`documentation` section describes how. +One the above is all set you can move on to :doc:`/container/dev-usage` in the Container Guide. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 99a0809471f..ca17fe09c15 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -2775,13 +2775,14 @@ timestamps. :FilePIDsEnabled ++++++++++++++++ -Toggles publishing of file-based PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. +Toggles publishing of file-level PIDs for the entire installation. By default this setting is absent and Dataverse Software assumes it to be true. If enabled, the registration will be performed asynchronously (in the background) during publishing of a dataset. If you don't want to register file-based PIDs for your installation, set: ``curl -X PUT -d 'false' http://localhost:8080/api/admin/settings/:FilePIDsEnabled`` -Note: File-level PID registration was added in Dataverse Software 4.9; it could not be disabled until Dataverse Software 4.9.3. + +It is possible to override the installation-wide setting for specific collections. For example, registration of PIDs for files can be enabled in a specific collection when it is disabled instance-wide. Or it can be disabled in specific collections where it is enabled by default. See :ref:`collection-attributes-api` for details. .. _:IndependentHandleService: @@ -3121,6 +3122,8 @@ This curl command... See also :doc:`oauth2`. +.. _:FileFixityChecksumAlgorithm: + :FileFixityChecksumAlgorithm ++++++++++++++++++++++++++++ @@ -3130,12 +3133,9 @@ The default checksum algorithm used is MD5 and should be sufficient for establis ``curl -X PUT -d 'SHA-512' http://localhost:8080/api/admin/settings/:FileFixityChecksumAlgorithm`` -The fixity algorithm used on existing files can be changed by a superuser using the API. An optional query parameter (num) can be used to limit the number of updates attempted. -The API call will only update the algorithm and checksum for a file if the existing checksum can be validated against the file. -Statistics concerning the updates are returned in the response to the API call with details in the log. +To update the algorithm used for existing files, see :ref:`UpdateChecksums` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}`` -``curl http://localhost:8080/api/admin/updateHashValues/{alg}?num=1`` +The fixity checksum algorithm in use can be discovered via API. See :ref:`get-fixity-algorithm` in the API Guide. .. _:PVMinLength: @@ -3415,6 +3415,8 @@ Limit on how many guestbook entries to display on the guestbook-responses page. ``curl -X PUT -d 10000 http://localhost:8080/api/admin/settings/:GuestbookResponsesPageDisplayLimit`` +.. _:CustomDatasetSummaryFields: + :CustomDatasetSummaryFields +++++++++++++++++++++++++++ @@ -3424,6 +3426,10 @@ You can replace the default dataset metadata fields that are displayed above fil You have to put the datasetFieldType name attribute in the :CustomDatasetSummaryFields setting for this to work. +The default fields are ``dsDescription,subject,keyword,publication,notesText``. + +This setting can be retrieved via API. See :ref:`get-dataset-summary-field-names` in the API Guide. + :AllowApiTokenLookupViaApi ++++++++++++++++++++++++++ diff --git a/doc/sphinx-guides/source/user/dataset-management.rst b/doc/sphinx-guides/source/user/dataset-management.rst index f530e825a19..648d81c1076 100755 --- a/doc/sphinx-guides/source/user/dataset-management.rst +++ b/doc/sphinx-guides/source/user/dataset-management.rst @@ -208,7 +208,7 @@ Previewers are available for the following file types: - Zip (preview and extract/download) - HTML - GeoJSON -- NetCDF/HDF5 (NcML format) +- NetCDF/HDF5 - Hypothes.is Additional file types will be added to the `dataverse-previewers `_ repo before they are listed above so please check there for the latest information or to request (or contribute!) an additional file previewer. @@ -348,6 +348,15 @@ A map will be shown as a preview of GeoJSON files when the previewer has been en NetCDF and HDF5 --------------- +H5Web Previewer +~~~~~~~~~~~~~~~ + +NetCDF and HDF5 files can be explored and visualized with H5Web_, which has been adapted into a file previewer tool (see :ref:`file-previews`) that can be enabled in your Dataverse installation. + +.. _H5Web: https://h5web.panosc.eu + +|h5web| + NcML ~~~~ @@ -801,6 +810,8 @@ If you deaccession the most recently published version of the dataset but not al :class: img-responsive .. |bagit-image1| image:: ./img/bagit-handler-errors.png :class: img-responsive +.. |h5web| image:: ./img/h5web.png + :class: img-responsive .. _Make Data Count: https://makedatacount.org .. _Crossref: https://crossref.org diff --git a/doc/sphinx-guides/source/user/dataverse-management.rst b/doc/sphinx-guides/source/user/dataverse-management.rst index ed90497da8c..b5e8d8f4fc9 100755 --- a/doc/sphinx-guides/source/user/dataverse-management.rst +++ b/doc/sphinx-guides/source/user/dataverse-management.rst @@ -216,7 +216,7 @@ In order to link a dataset, you will need your account to have the "Add Dataset" To link a dataset to your Dataverse collection, you must navigate to that dataset and click the white "Link" button in the upper-right corner of the dataset page. This will open up a window where you can type in the name of the Dataverse collection that you would like to link the dataset to. Select your Dataverse collection and click the save button. This will establish the link, and the dataset will now appear under your Dataverse collection. -There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using. +There is currently no way to remove established links in the UI. If you need to remove a link between a Dataverse collection and a dataset, please contact the support team for the Dataverse installation you are using (see the :ref:`unlink-a-dataset` section of the Admin Guide for more information). .. _dataverse-linking: diff --git a/doc/sphinx-guides/source/user/img/h5web.png b/doc/sphinx-guides/source/user/img/h5web.png new file mode 100644 index 00000000000..176aa775114 Binary files /dev/null and b/doc/sphinx-guides/source/user/img/h5web.png differ diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 06277d9541f..ab44dbc1806 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -12,6 +12,7 @@ services: - DATAVERSE_DB_HOST=postgres - DATAVERSE_DB_PASSWORD=secret - DATAVERSE_DB_USER=${DATAVERSE_DB_USER} + - ENABLE_JDWP=1 - DATAVERSE_FEATURE_API_BEARER_AUTH=1 ports: - "8080:8080" # HTTP (Dataverse Application) diff --git a/pom.xml b/pom.xml index 09e60fa5b67..96f598af0f5 100644 --- a/pom.xml +++ b/pom.xml @@ -184,6 +184,11 @@ provided + + fish.payara.api + payara-api + provided + com.sun.mail jakarta.mail diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java index 479438f3f45..2a3f2d50364 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java @@ -113,18 +113,20 @@ public Map getMetadataForTargetURL(DvObject dvObject) { } @Override - public boolean alreadyExists(DvObject dvo) throws Exception { + public boolean alreadyRegistered(DvObject dvo) throws Exception { if(dvo==null) { - logger.severe("Null DvObject sent to alreadyExists()."); + logger.severe("Null DvObject sent to alreadyRegistered()."); return false; } GlobalId globalId = dvo.getGlobalId(); if(globalId == null) { return false; } - return alreadyExists(globalId); + return alreadyRegistered(globalId, false); } + public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; + /* * ToDo: the DvObject being sent in provides partial support for the case where * it has a different authority/protocol than what is configured (i.e. a legacy @@ -188,7 +190,7 @@ public boolean isGlobalIdUnique(GlobalId globalId) { // not in local DB, look in the persistent identifier service try { - return ! alreadyExists(globalId); + return ! alreadyRegistered(globalId, false); } catch (Exception e){ //we can live with failure - means identifier not found remotely } diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java index c5d4faa0569..fa0a745d80f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java @@ -44,23 +44,22 @@ public boolean registerWhenPublished() { @Override - public boolean alreadyExists(GlobalId pid) { - logger.log(Level.FINE,"alreadyExists"); + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { + logger.log(Level.FINE,"alreadyRegistered"); if(pid==null || pid.asString().isEmpty()) { logger.fine("No identifier sent."); return false; } - boolean alreadyExists; + boolean alreadyRegistered; String identifier = pid.asString(); try{ - alreadyExists = doiDataCiteRegisterService.testDOIExists(identifier); + alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); } catch (Exception e){ - logger.log(Level.WARNING, "alreadyExists failed"); + logger.log(Level.WARNING, "alreadyRegistered failed"); return false; } - return alreadyExists; + return alreadyRegistered; } - @Override public String createIdentifier(DvObject dvObject) throws Exception { diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java index 5776aca8c8a..d9b0fde15da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java @@ -63,19 +63,10 @@ public boolean registerWhenPublished() { } @Override - public boolean alreadyExists(DvObject dvObject) throws Exception { - if(dvObject==null) { - logger.severe("Null DvObject sent to alreadyExists()."); - return false; - } - return alreadyExists(dvObject.getGlobalId()); - } - - @Override - public boolean alreadyExists(GlobalId pid) throws Exception { - logger.log(Level.FINE,"alreadyExists"); + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + logger.log(Level.FINE,"alreadyRegistered"); try { - HashMap result = ezidService.getMetadata(pid.asString()); + HashMap result = ezidService.getMetadata(pid.asString()); return result != null && !result.isEmpty(); // TODO just check for HTTP status code 200/404, sadly the status code is swept under the carpet } catch (EZIDException e ){ @@ -87,7 +78,7 @@ public boolean alreadyExists(GlobalId pid) throws Exception { if (e.getLocalizedMessage().contains("no such identifier")){ return false; } - logger.log(Level.WARNING, "alreadyExists failed"); + logger.log(Level.WARNING, "alreadyRegistered failed"); logger.log(Level.WARNING, "getIdentifier(dvObject) {0}", pid.asString()); logger.log(Level.WARNING, "String {0}", e.toString()); logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java index 1a1a712088c..0182c745cd0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java @@ -75,5 +75,4 @@ public boolean isConfigured() { protected String getProviderKeyName() { return null; } - } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 196f84b6877..c30bfce368a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,7 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.authorization.AccessRequest; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -11,19 +9,15 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -36,9 +30,7 @@ import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; -import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import org.apache.commons.lang3.RandomStringUtils; /** * @@ -199,6 +191,18 @@ public List findByDatasetId(Long studyId) { .setParameter("studyId", studyId).getResultList(); } + /** + * + * @param collectionId numeric id of the parent collection ("dataverse") + * @return list of files in the datasets that are *direct* children of the collection specified + * (i.e., no datafiles in sub-collections of this collection will be included) + */ + public List findByDirectCollectionOwner(Long collectionId) { + String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id"; + return em.createQuery(queryString, DataFile.class) + .setParameter("collectionId", collectionId).getResultList(); + } + public List findAllRelatedByRootDatafileId(Long datafileId) { /* Get all files with the same root datafile id @@ -561,364 +565,6 @@ public DataFile findCheapAndEasy(Long id) { return dataFile; } - /* - * This is an experimental method for populating the versions of - * the datafile with the filemetadatas, optimized for making as few db - * queries as possible. - * It should only be used to retrieve filemetadata for the DatasetPage! - * It is not guaranteed to adequately perform anywhere else. - */ - - public void findFileMetadataOptimizedExperimental(Dataset owner, DatasetVersion version, AuthenticatedUser au) { - List dataFiles = new ArrayList<>(); - List dataTables = new ArrayList<>(); - //List retList = new ArrayList<>(); - - // TODO: - // replace these maps with simple lists and run binary search on them. -- 4.2.1 - - Map userMap = new HashMap<>(); - Map filesMap = new HashMap<>(); - Map datatableMap = new HashMap<>(); - Map categoryMap = new HashMap<>(); - Map> fileTagMap = new HashMap<>(); - List accessRequestFileIds = new ArrayList(); - - List fileTagLabels = DataFileTag.listTags(); - - - int i = 0; - //Cache responses - Map embargoMap = new HashMap(); - - List dataTableResults = em.createNativeQuery("SELECT t0.ID, t0.DATAFILE_ID, t0.UNF, t0.CASEQUANTITY, t0.VARQUANTITY, t0.ORIGINALFILEFORMAT, t0.ORIGINALFILESIZE, t0.ORIGINALFILENAME FROM dataTable t0, dataFile t1, dvObject t2 WHERE ((t0.DATAFILE_ID = t1.ID) AND (t1.ID = t2.ID) AND (t2.OWNER_ID = " + owner.getId() + ")) ORDER BY t0.ID").getResultList(); - - for (Object[] result : dataTableResults) { - DataTable dataTable = new DataTable(); - long fileId = ((Number) result[1]).longValue(); - - dataTable.setId(((Number) result[1]).longValue()); - - dataTable.setUnf((String)result[2]); - - dataTable.setCaseQuantity((Long)result[3]); - - dataTable.setVarQuantity((Long)result[4]); - - dataTable.setOriginalFileFormat((String)result[5]); - - dataTable.setOriginalFileSize((Long)result[6]); - - dataTable.setOriginalFileName((String)result[7]); - - dataTables.add(dataTable); - datatableMap.put(fileId, i++); - - } - - logger.fine("Retrieved "+dataTables.size()+" DataTable objects."); - - List dataTagsResults = em.createNativeQuery("SELECT t0.DATAFILE_ID, t0.TYPE FROM DataFileTag t0, dvObject t1 WHERE (t1.ID = t0.DATAFILE_ID) AND (t1.OWNER_ID="+ owner.getId() + ")").getResultList(); - for (Object[] result : dataTagsResults) { - Long datafile_id = (Long) result[0]; - Integer tagtype_id = (Integer) result[1]; - if (fileTagMap.get(datafile_id) == null) { - fileTagMap.put(datafile_id, new HashSet<>()); - } - fileTagMap.get(datafile_id).add(tagtype_id); - } - logger.fine("Retrieved "+dataTagsResults.size()+" data tags."); - dataTagsResults = null; - - //Only need to check for access requests if there is an authenticated user - if (au != null) { - List accessRequests = em.createNativeQuery("SELECT t0.ID FROM DVOBJECT t0, FILEACCESSREQUESTS t1 WHERE t1.datafile_id = t0.id and t0.OWNER_ID = " + owner.getId() + " and t1.AUTHENTICATED_USER_ID = " + au.getId() + " ORDER BY t0.ID").getResultList(); - for (Object result : accessRequests) { - accessRequestFileIds.add(Long.valueOf((Integer)result)); - } - logger.fine("Retrieved " + accessRequests.size() + " access requests."); - accessRequests = null; - } - - i = 0; - - List fileResults = em.createNativeQuery("SELECT t0.ID, t0.CREATEDATE, t0.INDEXTIME, t0.MODIFICATIONTIME, t0.PERMISSIONINDEXTIME, t0.PERMISSIONMODIFICATIONTIME, t0.PUBLICATIONDATE, t0.CREATOR_ID, t0.RELEASEUSER_ID, t1.CONTENTTYPE, t0.STORAGEIDENTIFIER, t1.FILESIZE, t1.INGESTSTATUS, t1.CHECKSUMVALUE, t1.RESTRICTED, t1.CHECKSUMTYPE, t1.PREVIOUSDATAFILEID, t1.ROOTDATAFILEID, t0.PROTOCOL, t0.AUTHORITY, t0.IDENTIFIER, t1.EMBARGO_ID FROM DVOBJECT t0, DATAFILE t1 WHERE ((t0.OWNER_ID = " + owner.getId() + ") AND ((t1.ID = t0.ID) AND (t0.DTYPE = 'DataFile'))) ORDER BY t0.ID").getResultList(); - - for (Object[] result : fileResults) { - Integer file_id = (Integer) result[0]; - - DataFile dataFile = new DataFile(); - dataFile.setMergeable(false); - - dataFile.setId(file_id.longValue()); - - Timestamp createDate = (Timestamp) result[1]; - Timestamp indexTime = (Timestamp) result[2]; - Timestamp modificationTime = (Timestamp) result[3]; - Timestamp permissionIndexTime = (Timestamp) result[4]; - Timestamp permissionModificationTime = (Timestamp) result[5]; - Timestamp publicationDate = (Timestamp) result[6]; - - dataFile.setCreateDate(createDate); - dataFile.setIndexTime(indexTime); - dataFile.setModificationTime(modificationTime); - dataFile.setPermissionIndexTime(permissionIndexTime); - dataFile.setPermissionModificationTime(permissionModificationTime); - dataFile.setPublicationDate(publicationDate); - - Long creatorId = (Long) result[7]; - if (creatorId != null) { - AuthenticatedUser creator = userMap.get(creatorId); - if (creator == null) { - creator = userService.find(creatorId); - if (creator != null) { - userMap.put(creatorId, creator); - } - } - if (creator != null) { - dataFile.setCreator(creator); - } - } - - dataFile.setOwner(owner); - - Long releaseUserId = (Long) result[8]; - if (releaseUserId != null) { - AuthenticatedUser releaseUser = userMap.get(releaseUserId); - if (releaseUser == null) { - releaseUser = userService.find(releaseUserId); - if (releaseUser != null) { - userMap.put(releaseUserId, releaseUser); - } - } - if (releaseUser != null) { - dataFile.setReleaseUser(releaseUser); - } - } - - String contentType = (String) result[9]; - - if (contentType != null) { - dataFile.setContentType(contentType); - } - - String storageIdentifier = (String) result[10]; - - if (storageIdentifier != null) { - dataFile.setStorageIdentifier(storageIdentifier); - } - - Long fileSize = (Long) result[11]; - - if (fileSize != null) { - dataFile.setFilesize(fileSize); - } - - if (result[12] != null) { - String ingestStatusString = (String) result[12]; - dataFile.setIngestStatus(ingestStatusString.charAt(0)); - } - - String md5 = (String) result[13]; - - if (md5 != null) { - dataFile.setChecksumValue(md5); - } - - Boolean restricted = (Boolean) result[14]; - if (restricted != null) { - dataFile.setRestricted(restricted); - } - - String checksumType = (String) result[15]; - if (checksumType != null) { - try { - // In the database we store "SHA1" rather than "SHA-1". - DataFile.ChecksumType typeFromStringInDatabase = DataFile.ChecksumType.valueOf(checksumType); - dataFile.setChecksumType(typeFromStringInDatabase); - } catch (IllegalArgumentException ex) { - logger.info("Exception trying to convert " + checksumType + " to enum: " + ex); - } - } - - Long previousDataFileId = (Long) result[16]; - if (previousDataFileId != null) { - dataFile.setPreviousDataFileId(previousDataFileId); - } - - Long rootDataFileId = (Long) result[17]; - if (rootDataFileId != null) { - dataFile.setRootDataFileId(rootDataFileId); - } - - String protocol = (String) result[18]; - if (protocol != null) { - dataFile.setProtocol(protocol); - } - - String authority = (String) result[19]; - if (authority != null) { - dataFile.setAuthority(authority); - } - - String identifier = (String) result[20]; - if (identifier != null) { - dataFile.setIdentifier(identifier); - } - - Long embargo_id = (Long) result[21]; - if (embargo_id != null) { - if (embargoMap.containsKey(embargo_id)) { - dataFile.setEmbargo(embargoMap.get(embargo_id)); - } else { - Embargo e = embargoService.findByEmbargoId(embargo_id); - dataFile.setEmbargo(e); - embargoMap.put(embargo_id, e); - } - } - - // TODO: - // - if ingest status is "bad", look up the ingest report; - // - is it a dedicated thumbnail for the dataset? (do we ever need that info?? - not on the dataset page, I don't think...) - - // Is this a tabular file? - - if (datatableMap.get(dataFile.getId()) != null) { - dataTables.get(datatableMap.get(dataFile.getId())).setDataFile(dataFile); - dataFile.setDataTable(dataTables.get(datatableMap.get(dataFile.getId()))); - - } - - if (fileTagMap.get(dataFile.getId()) != null) { - for (Integer tag_id : fileTagMap.get(dataFile.getId())) { - DataFileTag tag = new DataFileTag(); - tag.setTypeByLabel(fileTagLabels.get(tag_id)); - tag.setDataFile(dataFile); - dataFile.addTag(tag); - } - } - - if (dataFile.isRestricted() && accessRequestFileIds.contains(dataFile.getId())) { - dataFile.addFileAccessRequester(au); - } - - dataFiles.add(dataFile); - filesMap.put(dataFile.getId(), i++); - } - - logger.fine("Retrieved and cached "+i+" datafiles."); - - i = 0; - for (DataFileCategory fileCategory : owner.getCategories()) { - //logger.fine("category: id="+fileCategory.getId()); - categoryMap.put(fileCategory.getId(), i++); - } - - logger.fine("Retrieved "+i+" file categories attached to the dataset."); - - version.setFileMetadatas(retrieveFileMetadataForVersion(owner, version, dataFiles, filesMap, categoryMap)); - logger.fine("Retrieved " + version.getFileMetadatas().size() + " filemetadatas for the version " + version.getId()); - owner.setFiles(dataFiles); - } - - private List retrieveFileMetadataForVersion(Dataset dataset, DatasetVersion version, List dataFiles, Map filesMap, Map categoryMap) { - List retList = new ArrayList<>(); - Map> categoryMetaMap = new HashMap<>(); - - List categoryResults = em.createNativeQuery("select t0.filecategories_id, t0.filemetadatas_id from filemetadata_datafilecategory t0, filemetadata t1 where (t0.filemetadatas_id = t1.id) AND (t1.datasetversion_id = "+version.getId()+")").getResultList(); - int i = 0; - for (Object[] result : categoryResults) { - Long category_id = (Long) result[0]; - Long filemeta_id = (Long) result[1]; - if (categoryMetaMap.get(filemeta_id) == null) { - categoryMetaMap.put(filemeta_id, new HashSet<>()); - } - categoryMetaMap.get(filemeta_id).add(category_id); - i++; - } - logger.fine("Retrieved and mapped "+i+" file categories attached to files in the version "+version.getId()); - - List metadataResults = em.createNativeQuery("select id, datafile_id, DESCRIPTION, LABEL, RESTRICTED, DIRECTORYLABEL, prov_freeform from FileMetadata where datasetversion_id = "+version.getId() + " ORDER BY LABEL").getResultList(); - - for (Object[] result : metadataResults) { - Integer filemeta_id = (Integer) result[0]; - - if (filemeta_id == null) { - continue; - } - - Long file_id = (Long) result[1]; - if (file_id == null) { - continue; - } - - Integer file_list_id = filesMap.get(file_id); - if (file_list_id == null) { - continue; - } - FileMetadata fileMetadata = new FileMetadata(); - fileMetadata.setId(filemeta_id.longValue()); - fileMetadata.setCategories(new LinkedList<>()); - - if (categoryMetaMap.get(fileMetadata.getId()) != null) { - for (Long cat_id : categoryMetaMap.get(fileMetadata.getId())) { - if (categoryMap.get(cat_id) != null) { - fileMetadata.getCategories().add(dataset.getCategories().get(categoryMap.get(cat_id))); - } - } - } - - fileMetadata.setDatasetVersion(version); - - // Link the FileMetadata object to the DataFile: - fileMetadata.setDataFile(dataFiles.get(file_list_id)); - // ... and the DataFile back to the FileMetadata: - fileMetadata.getDataFile().getFileMetadatas().add(fileMetadata); - - String description = (String) result[2]; - - if (description != null) { - fileMetadata.setDescription(description); - } - - String label = (String) result[3]; - - if (label != null) { - fileMetadata.setLabel(label); - } - - Boolean restricted = (Boolean) result[4]; - if (restricted != null) { - fileMetadata.setRestricted(restricted); - } - - String dirLabel = (String) result[5]; - if (dirLabel != null){ - fileMetadata.setDirectoryLabel(dirLabel); - } - - String provFreeForm = (String) result[6]; - if (provFreeForm != null){ - fileMetadata.setProvFreeForm(provFreeForm); - } - - retList.add(fileMetadata); - } - - logger.fine("Retrieved "+retList.size()+" file metadatas for version "+version.getId()+" (inside the retrieveFileMetadataForVersion method)."); - - - /* - We no longer perform this sort here, just to keep this filemetadata - list as identical as possible to when it's produced by the "traditional" - EJB method. When it's necessary to have the filemetadatas sorted by - FileMetadata.compareByLabel, the DatasetVersion.getFileMetadatasSorted() - method should be called. - - Collections.sort(retList, FileMetadata.compareByLabel); */ - - return retList; - } public List findIngestsInProgress() { if ( em.isOpen() ) { @@ -1438,38 +1084,6 @@ public List selectFilesWithMissingOriginalSizes() { * @param idServiceBean * @return {@code true} iff the global identifier is unique. */ -/* public boolean isGlobalIdUnique(String userIdentifier, DataFile datafile, GlobalIdServiceBean idServiceBean) { - String testProtocol = ""; - String testAuthority = ""; - if (datafile.getAuthority() != null){ - testAuthority = datafile.getAuthority(); - } else { - testAuthority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority); - } - if (datafile.getProtocol() != null){ - testProtocol = datafile.getProtocol(); - } else { - testProtocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol); - } - - boolean u = em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") - .setParameter("protocol", testProtocol) - .setParameter("authority", testAuthority) - .setParameter("identifier",userIdentifier) - .getResultList().isEmpty(); - - try{ - if (idServiceBean.alreadyExists(new GlobalId(testProtocol, testAuthority, userIdentifier))) { - u = false; - } - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - - return u; - } -*/ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { // Verify that the DataFile no longer exists: if (find(dataFileId) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 7823b753845..f9c839a0fff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -43,6 +43,10 @@ * @author skraffmiller */ @NamedQueries({ + // Dataset.findById should only be used if you're going to iterate over files (otherwise, lazy loading in DatasetService.find() is better). + // If you are going to iterate over files, preferably call the DatasetService.findDeep() method i.s.o. using this query directly. + @NamedQuery(name = "Dataset.findById", + query = "SELECT o FROM Dataset o LEFT JOIN FETCH o.files WHERE o.id=:id"), @NamedQuery(name = "Dataset.findIdStale", query = "SELECT d.id FROM Dataset d WHERE d.indexTime is NULL OR d.indexTime < d.modificationTime"), @NamedQuery(name = "Dataset.findIdStalePermission", diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 3594d2375a3..5aed893ef8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -40,6 +40,7 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -81,6 +82,8 @@ import java.util.Set; import java.util.Collection; import java.util.logging.Logger; +import java.util.stream.Collectors; + import javax.ejb.EJB; import javax.ejb.EJBException; import javax.faces.application.FacesMessage; @@ -233,6 +236,8 @@ public enum DisplayMode { ExternalToolServiceBean externalToolService; @EJB SolrClientService solrClientService; + @EJB + DvObjectServiceBean dvObjectService; @Inject DataverseRequestServiceBean dvRequestService; @Inject @@ -678,48 +683,45 @@ public void showAll(){ } private List selectFileMetadatasForDisplay() { - Set searchResultsIdSet = null; - - if (isIndexedVersion()) { + final Set searchResultsIdSet; + if (isIndexedVersion() && StringUtil.isEmpty(fileLabelSearchTerm) && StringUtil.isEmpty(fileTypeFacet) && StringUtil.isEmpty(fileAccessFacet) && StringUtil.isEmpty(fileTagsFacet)) { + // Indexed version: we need facets, they are set as a side effect of getFileIdsInVersionFromSolr method. + // But, no search terms were specified, we will return the full + // list of the files in the version: we discard the result from getFileIdsInVersionFromSolr. + getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); + // Since the search results should include the full set of fmds if all the + // terms/facets are empty, setting them to null should just be + // an optimization to skip the loop below + searchResultsIdSet = null; + } else if (isIndexedVersion()) { // We run the search even if no search term and/or facets are // specified - to generate the facet labels list: searchResultsIdSet = getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm); - // But, if no search terms were specified, we return the full - // list of the files in the version: - if (StringUtil.isEmpty(fileLabelSearchTerm) - && StringUtil.isEmpty(fileTypeFacet) - && StringUtil.isEmpty(fileAccessFacet) - && StringUtil.isEmpty(fileTagsFacet)) { - // Since the search results should include the full set of fmds if all the - // terms/facets are empty, setting them to null should just be - // an optimization for the loop below - searchResultsIdSet = null; - } - } else { + } else if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { // No, this is not an indexed version. // If the search term was specified, we'll run a search in the db; // if not - return the full list of files in the version. // (no facets without solr!) - if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) { - searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); - } + searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm); + } else { + searchResultsIdSet = null; } - List retList = new ArrayList<>(); - - for (FileMetadata fileMetadata : workingVersion.getFileMetadatas()) { - if (searchResultsIdSet == null || searchResultsIdSet.contains(fileMetadata.getDataFile().getId())) { - retList.add(fileMetadata); - } + final List md = workingVersion.getFileMetadatas(); + final List retList; + if (searchResultsIdSet == null) { + retList = new ArrayList<>(md); + } else { + retList = md.stream().filter(x -> searchResultsIdSet.contains(x.getDataFile().getId())).collect(Collectors.toList()); } sortFileMetadatas(retList); return retList; } - private void sortFileMetadatas(List fileList) { + private void sortFileMetadatas(final List fileList) { - DataFileComparator dfc = new DataFileComparator(); - Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); + final DataFileComparator dfc = new DataFileComparator(); + final Comparator comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder)); Collections.sort(fileList, comp); } @@ -1843,6 +1845,17 @@ public boolean webloaderUploadSupported() { return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId()); } + private void setIdByPersistentId() { + GlobalId gid = PidUtil.parseAsGlobalID(persistentId); + Long id = dvObjectService.findIdByGlobalId(gid, DvObject.DType.Dataset); + if (id == null) { + id = dvObjectService.findIdByAltGlobalId(gid, DvObject.DType.Dataset); + } + if (id != null) { + this.setId(id); + } + } + private String init(boolean initFull) { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes @@ -1866,23 +1879,11 @@ private String init(boolean initFull) { // Set the workingVersion and Dataset // --------------------------------------- if (persistentId != null) { - logger.fine("initializing DatasetPage with persistent ID " + persistentId); - // Set Working Version and Dataset by PersistentID - dataset = datasetService.findByGlobalId(persistentId); - if (dataset == null) { - logger.warning("No such dataset: "+persistentId); - return permissionsWrapper.notFound(); - } - logger.fine("retrieved dataset, id="+dataset.getId()); - - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion(); - logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState()); - - } else if (this.getId() != null) { + setIdByPersistentId(); + } + if (this.getId() != null) { // Set Working Version and Dataset by Datasaet Id and Version - dataset = datasetService.find(this.getId()); + dataset = datasetService.findDeep(this.getId()); if (dataset == null) { logger.warning("No such dataset: "+dataset); return permissionsWrapper.notFound(); @@ -1978,11 +1979,6 @@ private String init(boolean initFull) { // init the list of FileMetadatas if (workingVersion.isDraft() && canUpdateDataset()) { readOnly = false; - } else { - // an attempt to retreive both the filemetadatas and datafiles early on, so that - // we don't have to do so later (possibly, many more times than necessary): - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); } // This will default to all the files in the version, if the search term // parameter hasn't been specified yet: @@ -2849,15 +2845,19 @@ public String refresh() { DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null; if (persistentId != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version); - dataset = datasetService.findByGlobalId(persistentId); + setIdByPersistentId(); + if (this.getId() == null) { + logger.warning("No such dataset: "+persistentId); + return permissionsWrapper.notFound(); + } + dataset = datasetService.findDeep(this.getId()); + if (dataset == null) { + logger.warning("No such dataset: "+persistentId); + return permissionsWrapper.notFound(); + } retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); } else if (versionId != null) { retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId); - } else if (dataset.getId() != null) { - //retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version); - dataset = datasetService.find(dataset.getId()); - retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version); } if (retrieveDatasetVersionResponse == null) { @@ -2882,11 +2882,6 @@ public String refresh() { this.dataset = this.workingVersion.getDataset(); } - if (readOnly) { - AuthenticatedUser au = session.getUser() instanceof AuthenticatedUser ? (AuthenticatedUser) session.getUser() : null; - datafileService.findFileMetadataOptimizedExperimental(dataset, workingVersion, au); - } - fileMetadatasSearch = selectFileMetadatasForDisplay(); displayCitation = dataset.getCitation(true, workingVersion); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 0f7599f6ae1..305afd2ed30 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -38,6 +38,7 @@ import javax.ejb.TransactionAttributeType; import javax.inject.Named; import javax.persistence.EntityManager; +import javax.persistence.LockModeType; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; @@ -105,6 +106,38 @@ public Dataset find(Object pk) { return em.find(Dataset.class, pk); } + /** + * Retrieve a dataset with the deep underlying structure in one query execution. + * This is a more optimal choice when accessing files of a dataset. + * In a contrast, the find() method does not pre-fetch the file objects and results in point queries when accessing these objects. + * Since the files have a deep structure, many queries can be prevented by using the findDeep() method, especially for large datasets + * containing many files, and when iterating over all the files. + * When you are not going to access the file objects, the default find() method is better because of the lazy loading. + * @return a dataset with pre-fetched file objects + */ + public Dataset findDeep(Object pk) { + return (Dataset) em.createNamedQuery("Dataset.findById") + .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files + .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.files.dataTables") + .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") + .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") + .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") + .setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") + .setHint("eclipselink.left-join-fetch", "o.files.embargo") + .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") + .setHint("eclipselink.left-join-fetch", "o.files.owner") + .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.files.creator") + .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") + .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .getSingleResult(); + } + public List findByOwnerId(Long ownerId) { return findByOwnerId(ownerId, false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index 439e4b17ed4..9f272ec6877 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -1118,13 +1118,7 @@ public JsonObjectBuilder fixMissingUnf(String datasetVersionId, boolean forceRec // reindexing the dataset, to make sure the new UNF is in SOLR: boolean doNormalSolrDocCleanUp = true; - try { - Future indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post UNF update indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + datasetVersion.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, datasetVersion.getDataset()); - } + indexService.asyncIndexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp); return info; } diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index bc8716b6129..50d5ae09548 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -590,8 +590,34 @@ public void setCitationDatasetFieldTypes(List citationDatasetF this.citationDatasetFieldTypes = citationDatasetFieldTypes; } - + /** + * @Note: this setting is Nullable, with {@code null} indicating that the + * desired behavior is not explicitly configured for this specific collection. + * See the comment below. + */ + @Column(nullable = true) + private Boolean filePIDsEnabled; + /** + * Specifies whether the PIDs for Datafiles should be registered when publishing + * datasets in this Collection, if the behavior is explicitly configured. + * @return {@code Boolean.TRUE} if explicitly enabled, {@code Boolean.FALSE} if explicitly disabled. + * {@code null} indicates that the behavior is not explicitly defined, in which + * case the behavior should follow the explicit configuration of the first + * direct ancestor collection, or the instance-wide configuration, if none + * present. + * @Note: If present, this configuration therefore by default applies to all + * the sub-collections, unless explicitly overwritten there. + * @author landreev + */ + public Boolean getFilePIDsEnabled() { + return filePIDsEnabled; + } + + public void setFilePIDsEnabled(boolean filePIDsEnabled) { + this.filePIDsEnabled = filePIDsEnabled; + } + public List getDataverseFacets() { return getDataverseFacets(false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java index 9d09d0580e2..b83593f5b6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseRoleServiceBean.java @@ -303,7 +303,7 @@ public Set availableRoles(Long dvId) { Set roles = dv.getRoles(); roles.addAll(findBuiltinRoles()); - while (!dv.isEffectivelyPermissionRoot()) { + while (dv.getOwner() != null) { dv = dv.getOwner(); roles.addAll(dv.getRoles()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 854888737ee..e3013b8cf51 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -30,9 +30,13 @@ query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByGlobalId", + query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByAlternativeGlobalId", query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), + @NamedQuery(name = "DvObject.findIdByAlternativeGlobalId", + query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority", query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"), diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index e22e2f188fd..c9127af7c2b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -116,6 +116,16 @@ public DvObject findByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { return runFindByGlobalId(query, globalId, dtype); } + public Long findIdByGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + + public Long findIdByAltGlobalId(GlobalId globalId, DvObject.DType dtype) { + Query query = em.createNamedQuery("DvObject.findIdByAlternativeGlobalId"); + return runFindIdByGlobalId(query, globalId, dtype); + } + private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { DvObject foundDvObject = null; try { @@ -136,12 +146,37 @@ private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dty } return foundDvObject; } + + private Long runFindIdByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) { + Long foundDvObject = null; + try { + query.setParameter("identifier", gid.getIdentifier()); + query.setParameter("protocol", gid.getProtocol()); + query.setParameter("authority", gid.getAuthority()); + query.setParameter("dtype", dtype.getDType()); + foundDvObject = (Long) query.getSingleResult(); + } catch (javax.persistence.NoResultException e) { + // (set to .info, this can fill the log file with thousands of + // these messages during a large harvest run) + logger.fine("no dvObject found: " + gid.asString()); + // DO nothing, just return null. + return null; + } catch (Exception ex) { + logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage()); + return null; + } + return foundDvObject; + } public DvObject findByGlobalId(GlobalId globalId) { - return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") - .setParameter("identifier", globalId.getIdentifier()) - .setParameter("authority", globalId.getAuthority()) - .setParameter("protocol", globalId.getProtocol()).getSingleResult(); + try { + return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority") + .setParameter("identifier", globalId.getIdentifier()) + .setParameter("authority", globalId.getAuthority()).setParameter("protocol", globalId.getProtocol()) + .getSingleResult(); + } catch (NoResultException nre) { + return null; + } } public boolean isGlobalIdLocallyUnique(GlobalId globalId) { diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java index 4ff3d6dc9ac..aebf13778c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java @@ -15,10 +15,25 @@ public interface GlobalIdServiceBean { static final Logger logger = Logger.getLogger(GlobalIdServiceBean.class.getCanonicalName()); - boolean alreadyExists(DvObject dvo) throws Exception; + boolean alreadyRegistered(DvObject dvo) throws Exception; + + /** + * This call reports whether a PID is registered with the external Provider + * service. For providers like DOIs/Handles with an external service, this call + * should accurately report whether the PID has been registered in the service. + * For providers with no external service, the call should return true if the + * PID is defined locally. If it isn't, these no-service providers need to know + * whether use case of the caller requires that the returned value should + * default to true or false - via the noProviderDefault parameter. + * + * @param globalId + * @param noProviderDefault - when there is no external service, and no local + * use of the PID, this should be returned + * @return whether the PID should be considered registered or not. + * @throws Exception + */ + boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; - boolean alreadyExists(GlobalId globalId) throws Exception; - boolean registerWhenPublished(); boolean canManagePID(); boolean isConfigured(); diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java index 9ac4d5e29ae..d2149a3072a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java @@ -314,13 +314,13 @@ private String getAuthenticationHandle(String handlePrefix) { } @Override - public boolean alreadyExists(DvObject dvObject) throws Exception { + public boolean alreadyRegistered(DvObject dvObject) throws Exception { String handle = getDvObjectHandle(dvObject); return isHandleRegistered(handle); } @Override - public boolean alreadyExists(GlobalId pid) throws Exception { + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { String handle = pid.getAuthority() + "/" + pid.getIdentifier(); return isHandleRegistered(handle); } diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index 8f7f53de1a2..8c0a0bf90b0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -750,6 +750,14 @@ else if (dataset.isLockedFor(DatasetLock.Reason.InReview)) { } } } + + public void checkUpdateDatasetVersionLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { + boolean hasAtLeastOneLockThatIsNotAnIngestLock = dataset.isLocked() && dataset.getLocks().stream() + .anyMatch(lock -> !DatasetLock.Reason.Ingest.equals(lock.getReason())); + if (hasAtLeastOneLockThatIsNotAnIngestLock) { + checkEditDatasetLock(dataset, dataverseRequest, command); + } + } public void checkPublishDatasetLock(Dataset dataset, DataverseRequest dataverseRequest, Command command) throws IllegalCommandException { if (dataset.isLocked()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index d219339add9..b11a78c2416 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1376,7 +1376,7 @@ public Response fixMissingOriginalTypes() { "All the tabular files in the database already have the original types set correctly; exiting."); } else { for (Long fileid : affectedFileIds) { - logger.info("found file id: " + fileid); + logger.fine("found file id: " + fileid); } info.add("message", "Found " + affectedFileIds.size() + " tabular files with missing original types. Kicking off an async job that will repair the files in the background."); @@ -1566,6 +1566,12 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) { } catch (Exception e) { logger.info("Unexpected Exception: " + e.getMessage()); } + + try { + Thread.sleep(1000); + } catch (InterruptedException ie) { + logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!"); + } } logger.info("Final Results:"); logger.info(alreadyRegistered + " of " + count + " files were already registered. " + new Date()); @@ -1577,6 +1583,88 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) { return ok("Datafile registration complete." + successes + " of " + released + " unregistered, published files registered successfully."); } + + @GET + @AuthRequired + @Path("/registerDataFiles/{alias}") + public Response registerDataFilesInCollection(@Context ContainerRequestContext crc, @PathParam("alias") String alias, @QueryParam("sleep") Integer sleepInterval) { + Dataverse collection; + try { + collection = findDataverseOrDie(alias); + } catch (WrappedResponse r) { + return r.getResponse(); + } + + AuthenticatedUser superuser = authSvc.getAdminUser(); + if (superuser == null) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "Cannot find the superuser to execute /admin/registerDataFiles."); + } + + if (!systemConfig.isFilePIDsEnabledForCollection(collection)) { + return ok("Registration of file-level pid is disabled in collection "+alias+"; nothing to do"); + } + + List dataFiles = fileService.findByDirectCollectionOwner(collection.getId()); + Integer count = dataFiles.size(); + Integer countSuccesses = 0; + Integer countAlreadyRegistered = 0; + Integer countReleased = 0; + Integer countDrafts = 0; + + if (sleepInterval == null) { + sleepInterval = 1; + } else if (sleepInterval.intValue() < 1) { + return error(Response.Status.BAD_REQUEST, "Invalid sleep interval: "+sleepInterval); + } + + logger.info("Starting to register: analyzing " + count + " files. " + new Date()); + logger.info("Only unregistered, published files will be registered."); + + + + for (DataFile df : dataFiles) { + try { + if ((df.getIdentifier() == null || df.getIdentifier().isEmpty())) { + if (df.isReleased()) { + countReleased++; + DataverseRequest r = createDataverseRequest(superuser); + execCommand(new RegisterDvObjectCommand(r, df)); + countSuccesses++; + if (countSuccesses % 100 == 0) { + logger.info(countSuccesses + " out of " + count + " files registered successfully. " + new Date()); + } + } else { + countDrafts++; + logger.fine(countDrafts + " out of " + count + " files not yet published"); + } + } else { + countAlreadyRegistered++; + logger.fine(countAlreadyRegistered + " out of " + count + " files are already registered. " + new Date()); + } + } catch (WrappedResponse ex) { + countReleased++; + logger.info("Failed to register file id: " + df.getId()); + Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); + } catch (Exception e) { + logger.info("Unexpected Exception: " + e.getMessage()); + } + + try { + Thread.sleep(sleepInterval * 1000); + } catch (InterruptedException ie) { + logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!"); + } + } + + logger.info(countAlreadyRegistered + " out of " + count + " files were already registered. " + new Date()); + logger.info(countDrafts + " out of " + count + " files are not yet published. " + new Date()); + logger.info(countReleased + " out of " + count + " unregistered, published files to register. " + new Date()); + logger.info(countSuccesses + " out of " + countReleased + " unregistered, published files registered successfully. " + + new Date()); + + return ok("Datafile registration complete. " + countSuccesses + " out of " + countReleased + + " unregistered, published files registered successfully."); + } @GET @AuthRequired diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index d40bc153141..8c1390b597e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -82,6 +83,7 @@ import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.ArchiverUtil; @@ -236,6 +238,9 @@ public class Datasets extends AbstractApiBean { @EJB DatasetVersionServiceBean datasetversionService; + @Inject + PrivateUrlServiceBean privateUrlService; + /** * Used to consolidate the way we parse and handle dataset versions. * @param @@ -2764,14 +2769,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id } // kick of dataset reindexing, in case the locks removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("locks removed"); } return ok("dataset not locked"); @@ -2784,14 +2782,7 @@ public Response deleteLocks(@Context ContainerRequestContext crc, @PathParam("id dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing, in case the lock removed // affected the search card: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post lock removal indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("lock type " + lock.getReason() + " removed"); } return ok("no lock type " + lockType + " on the dataset"); @@ -2823,14 +2814,7 @@ public Response lockDataset(@Context ContainerRequestContext crc, @PathParam("id // refresh the dataset: dataset = findDatasetOrDie(id); // ... and kick of dataset reindexing: - try { - indexService.indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post add lock indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); - - } + indexService.asyncIndexDataset(dataset, true); return ok("dataset locked with lock type " + lockType); } catch (WrappedResponse wr) { @@ -3849,4 +3833,62 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, return wr.getResponse(); } } + + @GET + @Path("summaryFieldNames") + public Response getDatasetSummaryFieldNames() { + String customFieldNames = settingsService.getValueForKey(SettingsServiceBean.Key.CustomDatasetSummaryFields); + String[] fieldNames = DatasetUtil.getDatasetSummaryFieldNames(customFieldNames); + JsonArrayBuilder fieldNamesArrayBuilder = Json.createArrayBuilder(); + for (String fieldName : fieldNames) { + fieldNamesArrayBuilder.add(fieldName); + } + return ok(fieldNamesArrayBuilder); + } + + @GET + @Path("privateUrlDatasetVersion/{privateUrlToken}") + public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken) { + PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); + if (privateUrlUser == null) { + return notFound("Private URL user not found"); + } + boolean isAnonymizedAccess = privateUrlUser.hasAnonymizedAccess(); + String anonymizedFieldTypeNames = settingsSvc.getValueForKey(SettingsServiceBean.Key.AnonymizedFieldTypeNames); + if(isAnonymizedAccess && anonymizedFieldTypeNames == null) { + throw new NotAcceptableException("Anonymized Access not enabled"); + } + DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(privateUrlToken); + if (dsv == null || dsv.getId() == null) { + return notFound("Dataset version not found"); + } + JsonObjectBuilder responseJson; + if (isAnonymizedAccess) { + List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s"))); + responseJson = json(dsv, anonymizedFieldTypeNamesList); + } else { + responseJson = json(dsv); + } + return ok(responseJson); + } + + @GET + @Path("privateUrlDatasetVersion/{privateUrlToken}/citation") + public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken") String privateUrlToken) { + PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); + if (privateUrlUser == null) { + return notFound("Private URL user not found"); + } + DatasetVersion dsv = privateUrlService.getDraftDatasetVersionFromToken(privateUrlToken); + return (dsv == null || dsv.getId() == null) ? notFound("Dataset version not found") + : ok(dsv.getCitation(true, privateUrlUser.hasAnonymizedAccess())); + } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/citation") + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return response(req -> ok( + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index b57fe1dcd5d..bdab2818fbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -82,6 +82,7 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; import java.io.StringReader; import java.util.Collections; @@ -129,6 +130,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.servlet.http.HttpServletResponse; +import javax.validation.constraints.NotNull; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.Context; import javax.ws.rs.core.StreamingOutput; @@ -166,7 +168,7 @@ public class Dataverses extends AbstractApiBean { @EJB SwordServiceBean swordService; - + @POST @AuthRequired public Response addRoot(@Context ContainerRequestContext crc, String body) { @@ -590,6 +592,69 @@ public Response deleteDataverse(@Context ContainerRequestContext crc, @PathParam }, getRequestUser(crc)); } + /** + * Endpoint to change attributes of a Dataverse collection. + * + * @apiNote Example curl command: + * curl -X PUT -d "test" http://localhost:8080/api/dataverses/$ALIAS/attribute/alias + * to change the alias of the collection named $ALIAS to "test". + */ + @PUT + @AuthRequired + @Path("{identifier}/attribute/{attribute}") + public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier, + @PathParam("attribute") String attribute, @QueryParam("value") String value) { + try { + Dataverse collection = findDataverseOrDie(identifier); + User user = getRequestUser(crc); + DataverseRequest dvRequest = createDataverseRequest(user); + + // TODO: The cases below use hard coded strings, because we have no place for definitions of those! + // They are taken from util.json.JsonParser / util.json.JsonPrinter. This shall be changed. + // This also should be extended to more attributes, like the type, theme, contacts, some booleans, etc. + switch (attribute) { + case "alias": + collection.setAlias(value); + break; + case "name": + collection.setName(value); + break; + case "description": + collection.setDescription(value); + break; + case "affiliation": + collection.setAffiliation(value); + break; + /* commenting out the code from the draft pr #9462: + case "versionPidsConduct": + CollectionConduct conduct = CollectionConduct.findBy(value); + if (conduct == null) { + return badRequest("'" + value + "' is not one of [" + + String.join(",", CollectionConduct.asList()) + "]"); + } + collection.setDatasetVersionPidConduct(conduct); + break; + */ + case "filePIDsEnabled": + collection.setFilePIDsEnabled(parseBooleanOrDie(value)); + break; + default: + return badRequest("'" + attribute + "' is not a supported attribute"); + } + + // Off to persistence layer + execCommand(new UpdateDataverseCommand(collection, null, null, dvRequest, null)); + + // Also return modified collection to user + return ok("Update successful", JsonPrinter.json(collection)); + + // TODO: This is an anti-pattern, necessary due to this bean being an EJB, causing very noisy and unnecessary + // logging by the EJB container for bubbling exceptions. (It would be handled by the error handlers.) + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + @DELETE @AuthRequired @Path("{linkingDataverseId}/deleteLink/{linkedDataverseId}") diff --git a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java index 513f27c9e3d..3960fe4e996 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java @@ -247,11 +247,7 @@ private boolean createNewDraftVersion(ArrayList neededToUpdate } boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException ex) { - logger.log(Level.SEVERE, "Couldn''t index dataset: " + ex.getMessage()); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return true; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index 4b2e7a37b98..f6eda085c95 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -704,7 +704,7 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") dataFile = fileService.save(dataFile); // queue the data ingest job for asynchronous execution: - String status = ingestService.startIngestJobs(new ArrayList<>(Arrays.asList(dataFile)), u); + String status = ingestService.startIngestJobs(dataset.getId(), new ArrayList<>(Arrays.asList(dataFile)), u); if (!StringUtil.isEmpty(status)) { // This most likely indicates some sort of a problem (for example, @@ -812,4 +812,10 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); } + + @GET + @Path("fixityAlgorithm") + public Response getFixityAlgorithm() { + return ok(systemConfig.getFileFixityChecksumAlgorithm().toString()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index 728d86addcb..1361de8fbf7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -243,12 +243,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L Dataset dataset = datasetService.find(id); if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - // - return error(Status.BAD_REQUEST, writeFailureToLog(e.getLocalizedMessage(), dataset)); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); return ok("starting reindex of dataset " + id); } else { @@ -266,11 +261,7 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L * @todo How can we display the result to the user? */ boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), datasetThatOwnsTheFile); - } + indexService.asyncIndexDataset(datasetThatOwnsTheFile, doNormalSolrDocCleanUp); return ok("started reindexing " + type + "/" + id); } else { @@ -318,11 +309,7 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } if (dataset != null) { boolean doNormalSolrDocCleanUp = true; - try { - Future indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - writeFailureToLog(e.getLocalizedMessage(), dataset); - } + indexService.asyncIndexDataset(dataset, doNormalSolrDocCleanUp); JsonObjectBuilder data = Json.createObjectBuilder(); data.add("message", "Reindexed dataset " + persistentId); data.add("id", dataset.getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 9e805a304a5..a75775810d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -44,6 +44,7 @@ public class DatasetUtil { private static final Logger logger = Logger.getLogger(DatasetUtil.class.getCanonicalName()); + public static final String datasetDefaultSummaryFieldNames = "dsDescription,subject,keyword,publication,notesText"; public static String datasetLogoFilenameFinal = "dataset_logo_original"; public static String datasetLogoThumbnail = "dataset_logo"; public static String thumbExtension = ".thumb"; @@ -429,32 +430,33 @@ public static boolean isDatasetLogoPresent(Dataset dataset, int size) { return false; } - public static List getDatasetSummaryFields(DatasetVersion datasetVersion, String customFields) { - - List datasetFields = new ArrayList<>(); - - //if customFields are empty, go with default fields. - if(customFields==null || customFields.isEmpty()){ - customFields="dsDescription,subject,keyword,publication,notesText"; - } - - String[] customFieldList= customFields.split(","); - Map DatasetFieldsSet=new HashMap<>(); - + public static List getDatasetSummaryFields(DatasetVersion datasetVersion, String customFieldNames) { + Map datasetFieldsSet = new HashMap<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { - DatasetFieldsSet.put(dsf.getDatasetFieldType().getName(),dsf); + datasetFieldsSet.put(dsf.getDatasetFieldType().getName(), dsf); + } + String[] summaryFieldNames = getDatasetSummaryFieldNames(customFieldNames); + List datasetSummaryFields = new ArrayList<>(); + for (String summaryFieldName : summaryFieldNames) { + DatasetField df = datasetFieldsSet.get(summaryFieldName); + if (df != null) { + datasetSummaryFields.add(df); + } } - - for(String cfl : customFieldList) - { - DatasetField df = DatasetFieldsSet.get(cfl); - if(df!=null) - datasetFields.add(df); + return datasetSummaryFields; + } + + public static String[] getDatasetSummaryFieldNames(String customFieldNames) { + String summaryFieldNames; + // If the custom fields are empty, go with the default fields. + if(customFieldNames == null || customFieldNames.isEmpty()){ + summaryFieldNames = datasetDefaultSummaryFieldNames; + } else { + summaryFieldNames = customFieldNames; } - - return datasetFields; + return summaryFieldNames.split(","); } - + public static boolean isRsyncAppropriateStorageDriver(Dataset dataset){ // ToDo - rsync was written before multiple store support and currently is hardcoded to use the DataAccess.S3 store. // When those restrictions are lifted/rsync can be configured per store, this test should check that setting diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index bf831d39965..4c2510b6ccb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -47,6 +47,8 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + +import javax.ejb.Asynchronous; import javax.ejb.EJBException; import javax.json.Json; import javax.json.JsonArrayBuilder; @@ -643,7 +645,7 @@ private boolean runAddReplacePhase1(Dataset owner, df.setRootDataFileId(fileToReplace.getRootDataFileId()); } // Reuse any file PID during a replace operation (if File PIDs are in use) - if (systemConfig.isFilePIDsEnabled()) { + if (systemConfig.isFilePIDsEnabledForCollection(owner.getOwner())) { df.setGlobalId(fileToReplace.getGlobalId()); df.setGlobalIdCreateTime(fileToReplace.getGlobalIdCreateTime()); // Should be true or fileToReplace wouldn't have an identifier (since it's not @@ -1928,11 +1930,6 @@ private boolean step_100_startIngestJobs(){ // finalFileList.clear(); - // TODO: Need to run ingwest async...... - //if (true){ - //return true; - //} - if (!multifile) { msg("pre ingest start"); // start the ingest! @@ -1941,7 +1938,6 @@ private boolean step_100_startIngestJobs(){ } return true; } - private void msg(String m){ logger.fine(m); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index d9c5e58d250..eb171160376 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -139,16 +139,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Use for code that requires database ids postDBFlush(theDataset, ctxt); - // TODO: this needs to be moved in to an onSuccess method; not adding to this PR as its out of scope - // TODO: switch to asynchronous version when JPA sync works - // ctxt.index().asyncIndexDataset(theDataset.getId(), true); - try{ - ctxt.index().indexDataset(theDataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post create dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + theDataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, theDataset); - } + ctxt.index().asyncIndexDataset(theDataset, true); return theDataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 4e3c2835382..bd0a8e0375c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -150,11 +150,8 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx if (!theDataset.isIdentifierRegistered()) { GlobalIdServiceBean globalIdServiceBean = GlobalIdServiceBean.getBean(theDataset.getProtocol(), ctxt); if ( globalIdServiceBean != null ) { - if (globalIdServiceBean instanceof FakePidProviderServiceBean) { - retry=false; //No reason to allow a retry with the FakeProvider, so set false for efficiency - } try { - if (globalIdServiceBean.alreadyExists(theDataset)) { + if (globalIdServiceBean.alreadyRegistered(theDataset)) { int attempts = 0; if(retry) { do { @@ -162,7 +159,7 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx logger.log(Level.INFO, "Attempting to register external identifier for dataset {0} (trying: {1}).", new Object[]{theDataset.getId(), theDataset.getIdentifier()}); attempts++; - } while (globalIdServiceBean.alreadyExists(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); + } while (globalIdServiceBean.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); } if(!retry) { logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed."); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index 7e2269c375d..1d83f522f29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -65,10 +65,11 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //good wrapped response if the TOA/Request Access not in compliance prepareDatasetAndVersion(); - // TODO make async - // ctxt.index().indexDataset(dataset); - return ctxt.datasets().storeVersion(newVersion); - + DatasetVersion version = ctxt.datasets().storeVersion(newVersion); + if (ctxt.index() != null) { + ctxt.index().asyncIndexDataset(dataset, true); + } + return version; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java index 3572ac1d0a5..8c643d5cd65 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeaccessionDatasetVersionCommand.java @@ -106,15 +106,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { DatasetVersion version = (DatasetVersion) r; Dataset dataset = version.getDataset(); - try { - ctxt.index().indexDataset(dataset, true); - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java index 92d5064a4ac..83d0f877d61 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java @@ -204,7 +204,7 @@ public FileVisitResult postVisitDirectory(final Path dir, final IOException e) } GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); try { - if (idServiceBean.alreadyExists(doomed)) { + if (idServiceBean.alreadyRegistered(doomed)) { idServiceBean.deleteIdentifier(doomed); } } catch (Exception e) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java index aada2663bf6..f21a2782609 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetLinkingDataverseCommand.java @@ -66,15 +66,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { Dataset dataset = (Dataset) r; if (index) { - try { - ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete linked dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - + ctxt.index().asyncIndexDataset(dataset, true); } return retVal; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java index 3f63c3c6d27..a67d7008ef8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDatasetVersionCommand.java @@ -96,13 +96,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } } boolean doNormalSolrDocCleanUp = true; - try { - ctxt.index().indexDataset(doomed, doNormalSolrDocCleanUp); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post delete version indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + doomed.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, doomed); - } + ctxt.index().asyncIndexDataset(doomed, doNormalSolrDocCleanUp); return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index 5da7c6bfec5..41093444360 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -101,7 +101,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { if (!doomed.isHarvested()) { GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); try { - if (idServiceBean.alreadyExists(doomed)) { + if (idServiceBean.alreadyRegistered(doomed)) { idServiceBean.deleteIdentifier(doomed); for (DataFile df : doomed.getFiles()) { idServiceBean.deleteIdentifier(df); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 7ae9145009c..253c761f0c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -238,14 +238,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { } catch (Exception e) { logger.warning("Failure to send dataset published messages for : " + dataset.getId() + " : " + e.getMessage()); } - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post-publication indexing failed. You can kick off a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); //re-indexing dataverses that have additional subjects if (!dataversesToIndex.isEmpty()){ @@ -373,7 +366,7 @@ private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) t String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); String currentGlobalAuthority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabled(); + boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()); // We will skip trying to register the global identifiers for datafiles // if "dependent" file-level identifiers are requested, AND the naming // protocol, or the authority of the dataset global id is different from diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java index 36d5eaf6f31..478272950bd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java @@ -79,15 +79,10 @@ protected void additionalParameterTests(CommandContext ctxt) throws CommandExcep * that exist (and accessible in the PID provider account configured in * Dataverse) but aren't findable to be used. That could be the case if, for * example, someone was importing a draft dataset from elsewhere. - * - * Also note that just replacing the call above with the alreadyExists() call - * here would break import cases where a DOI is public but not managable with - * the currently configured PID provider credentials. If this is not a valid use - * case, the GET above could be removed. */ GlobalIdServiceBean globalIdServiceBean = GlobalIdServiceBean.getBean(ds.getProtocol(), ctxt); if (globalIdServiceBean != null) { - if (globalIdServiceBean.alreadyExists(ds)) { + if (globalIdServiceBean.alreadyRegistered(ds.getGlobalId(), true)) { return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java index da70529e76d..aef749d7e26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/LinkDatasetCommand.java @@ -69,14 +69,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; DatasetLinkingDataverse dld = (DatasetLinkingDataverse) r; - try { - ctxt.index().indexDataset(dld.getDataset(), true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post link dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dld.getDataset().getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dld.getDataset()); - retVal = false; - } + ctxt.index().asyncIndexDataset(dld.getDataset(), true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java index da9e5adf247..d82b2e7a81d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRolesCommand.java @@ -27,7 +27,7 @@ public ListRolesCommand(DataverseRequest aRequest, Dataverse aDefinitionPoint) { @Override public Set execute(CommandContext ctxt) throws CommandException { - return definitionPoint.getRoles(); + return ctxt.roles().availableRoles(definitionPoint.getId()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java index 55d02362e88..94bcfa2f5b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDatasetCommand.java @@ -141,15 +141,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException { moved.setOwner(destination); ctxt.em().merge(moved); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(moved, doNormalSolrDocCleanUp); - - } catch (Exception e) { // RuntimeException e ) { - logger.log(Level.WARNING, "Exception while indexing:" + e.getMessage()); //, e); - throw new CommandException(BundleUtil.getStringFromBundle("dashboard.card.datamove.dataset.command.error.indexingProblem"), this); - - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(moved, doNormalSolrDocCleanUp); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java index dcae4e039e6..ea38f5a7af7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MoveDataverseCommand.java @@ -302,14 +302,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException { if (moved.getDatasetLinkingDataverses() != null && !moved.getDatasetLinkingDataverses().isEmpty()) { for (DatasetLinkingDataverse dld : moved.getDatasetLinkingDataverses()) { Dataset linkedDS = ctxt.datasets().find(dld.getDataset().getId()); - try { - ctxt.index().indexDataset(linkedDS, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post move dataverse dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + linkedDS.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, linkedDS); - - } + ctxt.index().asyncIndexDataset(linkedDS, true); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index 5e29a21b6a1..f5ef121dee2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -135,7 +135,7 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = (currentGlobalIdProtocol.equals(theDataset.getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && ctxt.systemConfig().isFilePIDsEnabled(); + && ctxt.systemConfig().isFilePIDsEnabledForCollection(theDataset.getOwner()); if ( registerGlobalIdsForFiles ){ registerGlobalIdsForFiles = currentGlobalAuthority.equals( theDataset.getAuthority() ); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java index 564c4a1f6b5..bdb6ceffd6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RedetectFileTypeCommand.java @@ -83,12 +83,8 @@ public DataFile execute(CommandContext ctxt) throws CommandException { throw new CommandException("Exception while attempting to save the new file type: " + EjbUtil.ejbExceptionToString(ex), this); } Dataset dataset = fileToRedetect.getOwner(); - try { - boolean doNormalSolrDocCleanUp = true; - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - } catch (Exception ex) { - logger.info("Exception while reindexing files during file type redetection: " + ex.getLocalizedMessage()); - } + boolean doNormalSolrDocCleanUp = true; + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); try { ExportService instance = ExportService.getInstance(); instance.exportAllFormats(dataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 299d1a925f4..779bc7fb7fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -70,7 +70,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { target.setAuthority(authority); } } - if (idServiceBean.alreadyExists(target)) { + if (idServiceBean.alreadyRegistered(target)) { return; } String doiRetString = idServiceBean.createIdentifier(target); @@ -137,15 +137,8 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { //Only continue if you can successfully migrate the handle boolean doNormalSolrDocCleanUp = true; Dataset dataset = (Dataset) target; - try { - ctxt.index().indexDataset(dataset, doNormalSolrDocCleanUp); - ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post migrate handle dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - - } + ctxt.index().asyncIndexDataset(dataset, doNormalSolrDocCleanUp); + ctxt.solrIndex().indexPermissionsForOneDvObject( dataset); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index ba0348f57d6..caf37ad4de1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -72,14 +72,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post return to author indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java index 72f0ef335fb..557f9dff622 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetCurationStatusCommand.java @@ -101,14 +101,7 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java index fe14d56562d..2cae9e51896 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SetDatasetCitationDateCommand.java @@ -49,14 +49,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, false); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post set dataset citation date indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, false); + return retVal; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java index 130030798ab..77a4bf5b8ba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/SubmitDatasetForReviewCommand.java @@ -75,14 +75,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { boolean retVal = true; Dataset dataset = (Dataset) r; - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post submit for review indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } + ctxt.index().asyncIndexDataset(dataset, true); + return retVal; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 33f64f23076..12a5d8611f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -3,7 +3,6 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; @@ -12,17 +11,13 @@ import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; -import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.concurrent.Future; import java.util.logging.Level; import java.util.logging.Logger; import javax.validation.ConstraintViolationException; -import org.apache.solr.client.solrj.SolrServerException; - /** * * @author skraffmiller @@ -101,7 +96,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - ctxt.permissions().checkEditDatasetLock(theDataset, getRequest(), this); + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); Dataset savedDataset = null; try { @@ -270,21 +265,12 @@ public Dataset execute(CommandContext ctxt) throws CommandException { @Override public boolean onSuccess(CommandContext ctxt, Object r) { - - boolean retVal = true; - Dataset dataset = (Dataset) r; - - try { - Future indexString = ctxt.index().indexDataset(dataset, true); - } catch (IOException | SolrServerException e) { - String failureLogText = "Post update dataset indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); - failureLogText += "\r\n" + e.getLocalizedMessage(); - LoggingUtil.writeOnSuccessFailureLog(this, failureLogText, dataset); - retVal = false; - } - - return retVal; - + // Async indexing significantly improves performance when updating datasets with thousands of files + // Indexing will be started immediately, unless an index is already busy for the given data + // (it will be scheduled then for later indexing of the newest version). + // See the documentation of asyncIndexDataset method for more details. + ctxt.index().asyncIndexDataset((Dataset) r, true); + return true; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index 57a3394ff77..218b0ea89d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -113,13 +113,9 @@ public boolean onSuccess(CommandContext ctxt, Object r) { // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle - try { - Dataverse result = (Dataverse) r; - List datasets = ctxt.datasets().findByOwnerId(result.getId()); - ctxt.index().asyncIndexDatasetList(datasets, true); - } catch (IOException | SolrServerException e) { - // these datasets are being indexed asynchrounously, so not sure how to handle errors here - } + Dataverse result = (Dataverse) r; + List datasets = ctxt.datasets().findByOwnerId(result.getId()); + ctxt.index().asyncIndexDatasetList(datasets, true); return ctxt.dataverses().index((Dataverse) r); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 7e37241563c..7230f9f9c0a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -57,7 +57,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { // didn't need updating. String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabled(); + boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(target.getOwner()); // We will skip trying to update the global identifiers for datafiles if they // aren't being used. // If they are, we need to assure that there's an existing PID or, as when diff --git a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java index efa2c0c9300..7c76c4972a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java @@ -77,7 +77,7 @@ public JsonArray getDatasetFileDetails() { JsonArrayBuilder jab = Json.createArrayBuilder(); for (FileMetadata fileMetadata : dv.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); - jab.add(JsonPrinter.json(dataFile, fileMetadata)); + jab.add(JsonPrinter.json(dataFile, fileMetadata, true)); } return jab.build(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 1886ebec5a4..73ba2d204ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1712,7 +1712,7 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t // we're not writing the opening tag until we find an actual // tabular datafile. for (int i=0;i sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { - xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); - xmlw.writeCharacters(sumStat.getValue().toString()); - xmlw.writeEndElement(); //sumStat + // sum stats + if (dvar.containsKey("summaryStatistics")) { + for (Entry sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { + xmlw.writeStartElement("sumStat"); + writeAttribute(xmlw, "type", sumStat.getKey()); + xmlw.writeCharacters(sumStat.getValue().toString()); + xmlw.writeEndElement(); // sumStat + } } // categories - JsonArray varCats = dvar.getJsonArray("variableCategories"); - for (int i=0;i(); } - public IngestMessage(int messageLevel, Long authenticatedUserId) { - this.messageLevel = messageLevel; + public IngestMessage(Long authenticatedUserId) { this.authenticatedUserId = authenticatedUserId; datafile_ids = new ArrayList(); } - - private int messageLevel = INGEST_MESAGE_LEVEL_INFO; private Long datasetId; - private Long datasetVersionId; - private String versionNote; - private String datasetVersionNumber; private List datafile_ids; private Long authenticatedUserId; - - public String getVersionNote() { - return versionNote; - } - - public void setVersionNote(String versionNote) { - this.versionNote = versionNote; - } - - public int getMessageLevel() { - return messageLevel; - } - - public void setMessageLevel(int messageLevel) { - this.messageLevel = messageLevel; - } + private String info; public Long getDatasetId() { return datasetId; @@ -83,30 +54,6 @@ public Long getDatasetId() { public void setDatasetId(Long datasetId) { this.datasetId = datasetId; } - - public Long getDatasetVersionId() { - return datasetVersionId; - } - - public void setDatasetVersionId(Long datasetVersionId) { - this.datasetVersionId = datasetVersionId; - } - - public boolean sendInfoMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_INFO; - } - - public boolean sendErrorMessage() { - return messageLevel >= INGEST_MESAGE_LEVEL_ERROR; - } - - public String getDatasetVersionNumber() { - return datasetVersionNumber; - } - - public void setDatasetVersionNumber(String datasetVersionNumber) { - this.datasetVersionNumber = datasetVersionNumber; - } public List getFileIds() { return datafile_ids; @@ -123,4 +70,12 @@ public void addFileId(Long file_id) { public Long getAuthenticatedUserId() { return authenticatedUserId; } + + public void setInfo(String info) { + this.info = info; + } + + public String getInfo() { + return info; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java index 1fa94009f7a..77ec6701bc6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestMessageBean.java @@ -38,7 +38,6 @@ import javax.jms.Message; import javax.jms.MessageListener; import javax.jms.ObjectMessage; - /** * * This is an experimental, JMS-based implementation of asynchronous @@ -53,6 +52,7 @@ @ActivationConfigProperty(propertyName = "destinationType", propertyValue = "javax.jms.Queue") } ) + public class IngestMessageBean implements MessageListener { private static final Logger logger = Logger.getLogger(IngestMessageBean.class.getCanonicalName()); @EJB DatasetServiceBean datasetService; @@ -69,17 +69,27 @@ public IngestMessageBean() { public void onMessage(Message message) { IngestMessage ingestMessage = null; - Long datafile_id = null; AuthenticatedUser authenticatedUser = null; try { ObjectMessage om = (ObjectMessage) message; ingestMessage = (IngestMessage) om.getObject(); + // if the lock was removed while an ingest was queued, ratake the lock + // The "if" is the first thing that addDatasetLock method does. + // It has some complexity and would result in the code duplication if repeated here. + // If that check would be removed from the addDatasetLock method in the future without + // updating the code using this method, ingest code would still not break because + // we remove "all" ingest locks at the end (right now, there can be at most one ingest lock). + datasetService.addDatasetLock(ingestMessage.getDatasetId(), + DatasetLock.Reason.Ingest, + ingestMessage.getAuthenticatedUserId(), + ingestMessage.getInfo()); + authenticatedUser = authenticationServiceBean.findByID(ingestMessage.getAuthenticatedUserId()); - Iterator iter = ingestMessage.getFileIds().iterator(); - datafile_id = null; + Iterator iter = ingestMessage.getFileIds().iterator(); + Long datafile_id = null; boolean ingestWithErrors = false; @@ -87,7 +97,7 @@ public void onMessage(Message message) { sbIngestedFiles.append("
    "); while (iter.hasNext()) { - datafile_id = (Long) iter.next(); + datafile_id = iter.next(); logger.fine("Start ingest job;"); try { @@ -139,11 +149,10 @@ public void onMessage(Message message) { logger.info("trying to save datafile and the failed ingest report, id=" + datafile_id); datafile = datafileService.save(datafile); - Dataset dataset = datafile.getOwner(); - if (dataset != null && dataset.getId() != null) { + if (ingestMessage.getDatasetId() != null) { //logger.info("attempting to remove dataset lock for dataset " + dataset.getId()); //datasetService.removeDatasetLock(dataset.getId()); - ingestService.sendFailNotification(dataset.getId()); + ingestService.sendFailNotification(ingestMessage.getDatasetId()); } } } @@ -152,27 +161,11 @@ public void onMessage(Message message) { sbIngestedFiles.append("
"); - Long objectId = null; - - // Remove the dataset lock: - // (note that the assumption here is that all of the datafiles - // packed into this IngestMessage belong to the same dataset) - if (datafile_id != null) { - DataFile datafile = datafileService.find(datafile_id); - if (datafile != null) { - Dataset dataset = datafile.getOwner(); - objectId = dataset.getId(); - if (dataset != null && dataset.getId() != null) { - datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); - } - } - } - userNotificationService.sendNotification( authenticatedUser, Timestamp.from(Instant.now()), !ingestWithErrors ? UserNotification.Type.INGESTCOMPLETED : UserNotification.Type.INGESTCOMPLETEDWITHERRORS, - objectId, + ingestMessage.getDatasetId(), sbIngestedFiles.toString(), true ); @@ -182,9 +175,15 @@ public void onMessage(Message message) { ex.printStackTrace(); // error in getting object from message; can't send e-mail } finally { - // when we're done, go ahead and remove the lock (not yet) + // when we're done, go ahead and remove the lock try { - //datasetService.removeDatasetLock( ingestMessage.getDatasetId() ); + // Remove the dataset lock: + // (note that the assumption here is that all of the datafiles + // packed into this IngestMessage belong to the same dataset) + Dataset dataset = datasetService.find(ingestMessage.getDatasetId()); + if (dataset != null && dataset.getId() != null) { + datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.Ingest); + } } catch (Exception ex) { ex.printStackTrace(); // application was unable to remove the datasetLock } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9be24530615..c1e82042898 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -485,15 +485,17 @@ public void startIngestJobsForDataset(Dataset dataset, AuthenticatedUser user) { // todo: investigate why when calling save with the file object // gotten from the loop, the roles assignment added at create is removed // (switching to refinding via id resolves that) + // possible explanation: when flush-mode is auto, flush is on query, + // we make sure that the roles assignment added at create is flushed dataFile = fileService.find(dataFile.getId()); scheduledFiles.add(dataFile); } } - startIngestJobs(scheduledFiles, user); + startIngestJobs(dataset.getId(), scheduledFiles, user); } - public String startIngestJobs(List dataFiles, AuthenticatedUser user) { + public String startIngestJobs(Long datasetId, List dataFiles, AuthenticatedUser user) { IngestMessage ingestMessage = null; StringBuilder sb = new StringBuilder(); @@ -534,7 +536,7 @@ public String startIngestJobs(List dataFiles, AuthenticatedUser user) if (count > 0) { String info = "Ingest of " + count + " tabular data file(s) is in progress."; logger.info(info); - datasetService.addDatasetLock(scheduledFiles.get(0).getOwner().getId(), + datasetService.addDatasetLock(datasetId, DatasetLock.Reason.Ingest, (user != null) ? user.getId() : null, info); @@ -552,10 +554,12 @@ public int compare(DataFile d1, DataFile d2) { } }); - ingestMessage = new IngestMessage(IngestMessage.INGEST_MESAGE_LEVEL_INFO, user.getId()); + ingestMessage = new IngestMessage(user.getId()); for (int i = 0; i < count; i++) { ingestMessage.addFileId(scheduledFilesArray[i].getId()); } + ingestMessage.setDatasetId(datasetId); + ingestMessage.setInfo(info); QueueConnection conn = null; QueueSession session = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java index 68dd853d4de..54d64710511 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java @@ -20,7 +20,7 @@ public class FakePidProviderServiceBean extends DOIServiceBean { //Only need to check locally public boolean isGlobalIdUnique(GlobalId globalId) { try { - return ! alreadyExists(globalId); + return ! alreadyRegistered(globalId, false); } catch (Exception e){ //we can live with failure - means identifier not found remotely } @@ -28,8 +28,9 @@ public boolean isGlobalIdUnique(GlobalId globalId) { } @Override - public boolean alreadyExists(GlobalId globalId) throws Exception { - return ! dvObjectService.isGlobalIdLocallyUnique(globalId); + public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { + boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); + return existsLocally ? existsLocally : noProviderDefault; } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java index 957522b7728..f387188b690 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java @@ -69,10 +69,13 @@ public String getSeparator() { } @Override - public boolean alreadyExists(GlobalId globalId) throws Exception { - return ! dvObjectService.isGlobalIdLocallyUnique(globalId); + public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { + // Perma doesn't manage registration, so we assume all local PIDs can be treated + // as registered + boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); + return existsLocally ? existsLocally : noProviderDefault; } - + @Override public boolean registerWhenPublished() { return false; diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java index 088992fd3ec..20f1051763f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java @@ -43,8 +43,8 @@ public boolean registerWhenPublished() { } @Override - public boolean alreadyExists(GlobalId pid) { - return false; + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { + throw new NotImplementedException(); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java index c467b8672ee..8847a99bd20 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java @@ -35,7 +35,7 @@ public boolean registerWhenPublished() { } @Override - public boolean alreadyExists(GlobalId pid) throws Exception { + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { throw new NotImplementedException(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java index efe64052c4a..8eb0dfe4ebd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlServiceBean.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -61,6 +62,13 @@ public PrivateUrlRedirectData getPrivateUrlRedirectDataFromToken(String token) { return PrivateUrlUtil.getPrivateUrlRedirectData(getRoleAssignmentFromPrivateUrlToken(token)); } + /** + * @return DatasetVersion if it can be found using the token or null. + */ + public DatasetVersion getDraftDatasetVersionFromToken(String token) { + return PrivateUrlUtil.getDraftDatasetVersionFromRoleAssignment(getRoleAssignmentFromPrivateUrlToken(token)); + } + /** * @return A RoleAssignment or null. * diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java index 34c145fa6e8..932f58d875d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexBatchServiceBean.java @@ -205,15 +205,9 @@ public Future indexAllOrSubset(long numPartitions, long partitionId, boo int datasetFailureCount = 0; List datasetIds = datasetService.findAllOrSubsetOrderByFilesOwned(skipIndexed); for (Long id : datasetIds) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); - Future result = indexService.indexDatasetInNewTransaction(id); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetIds.size() + " (id=" + id + ")"); + indexService.indexDatasetInNewTransaction(id); } logger.info("done iterating through all datasets"); @@ -269,15 +263,9 @@ public void indexDataverseRecursively(Dataverse dataverse) { // index the Dataset children for (Long childId : datasetChildren) { - try { - datasetIndexCount++; - logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); - indexService.indexDatasetInNewTransaction(childId); - } catch (Exception e) { - //We want to keep running even after an exception so throw some more info into the log - datasetFailureCount++; - logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ") Exception info: " + e.getMessage()); - } + datasetIndexCount++; + logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")"); + indexService.indexDatasetInNewTransaction(childId); } long end = System.currentTimeMillis(); if (datasetFailureCount + dataverseFailureCount > 0){ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 2d0bf8f467c..0b8f93e47a9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -24,6 +24,7 @@ import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -55,6 +56,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; import java.util.function.Function; import java.util.logging.Logger; @@ -241,7 +243,7 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) solrInputDocument.addField(SearchFields.SOURCE, HARVESTED); } else { (this means that all dataverses are "local" - should this be removed? */ solrInputDocument.addField(SearchFields.IS_HARVESTED, false); - solrInputDocument.addField(SearchFields.METADATA_SOURCE, findRootDataverseCached().getName()); //rootDataverseName); + solrInputDocument.addField(SearchFields.METADATA_SOURCE, rootDataverse.getName()); //rootDataverseName); /*}*/ addDataverseReleaseDateToSolrDoc(solrInputDocument, dataverse); @@ -348,48 +350,98 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) } @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetInNewTransaction(Long datasetId) throws SolrServerException, IOException{ //Dataset dataset) { + public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { boolean doNormalSolrDocCleanUp = false; - Dataset dataset = em.find(Dataset.class, datasetId); - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); + Dataset dataset = datasetService.findDeep(datasetId); + asyncIndexDataset(dataset, doNormalSolrDocCleanUp); dataset = null; - return ret; } - @TransactionAttribute(REQUIRES_NEW) - public Future indexDatasetObjectInNewTransaction(Dataset dataset) throws SolrServerException, IOException{ //Dataset dataset) { - boolean doNormalSolrDocCleanUp = false; - // return indexDataset(dataset, doNormalSolrDocCleanUp); - Future ret = indexDataset(dataset, doNormalSolrDocCleanUp); - dataset = null; - return ret; + // The following two variables are only used in the synchronized getNextToIndex method and do not need to be synchronized themselves + + // nextToIndex contains datasets mapped by dataset id that were added for future indexing while the indexing was already ongoing for a given dataset + // (if there already was a dataset scheduled for indexing, it is overwritten and only the most recently requested version is kept in the map) + private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); + // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now + private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); + + // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished + // Pass non-null Dataset to schedule it for indexing + synchronized private static Dataset getNextToIndex(Long id, Dataset d) { + if (d == null) { // -> indexing of the dataset with id has finished + Dataset next = NEXT_TO_INDEX.remove(id); + if (next == null) { // -> no new indexing jobs were requested while indexing was ongoing + // the job can be stopped now + INDEXING_NOW.remove(id); + } + return next; + } + // index job is requested for a non-null dataset + if (INDEXING_NOW.containsKey(id)) { // -> indexing job is already ongoing, and a new job should not be started by the current thread -> return null + NEXT_TO_INDEX.put(id, d); + return null; + } + // otherwise, start a new job + INDEXING_NOW.put(id, true); + return d; } + /** + * Indexes a dataset asynchronously. + * + * Note that this method implement a synchronized skipping mechanism. When an + * indexing job is already running for a given dataset in the background, the + * new call will not index that dataset, but will delegate the execution to + * the already running job. The running job will pick up the requested indexing + * once that it is finished with the ongoing indexing. If another indexing is + * requested before the ongoing indexing is finished, only the indexing that is + * requested most recently will be picked up for the next indexing. + * + * In other words: we can have at most one indexing ongoing for the given + * dataset, and at most one (most recent) request for reindexing of the same + * dataset. All requests that come between the most recent one and the ongoing + * one are skipped for the optimization reasons. For a more in depth discussion, + * see the pull request: https://github.com/IQSS/dataverse/pull/9558 + * + * @param dataset The dataset to be indexed. + * @param doNormalSolrDocCleanUp Flag for normal Solr doc clean up. + */ @Asynchronous - public Future asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { - return indexDataset(dataset, doNormalSolrDocCleanUp); + public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + Long id = dataset.getId(); + Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) + while (next != null) { + try { + indexDataset(next, doNormalSolrDocCleanUp); + } catch (Exception e) { // catch all possible exceptions; otherwise when something unexpected happes the dataset wold remain locked and impossible to reindex + String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); + } + next = getNextToIndex(id, null); // if dataset was not changed during the indexing (and no new job was requested), next is null and loop can be stopped + } } - - @Asynchronous - public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + + public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { - indexDataset(dataset, true); + asyncIndexDataset(dataset, true); } } - public Future indexDvObject(DvObject objectIn) throws SolrServerException, IOException { - + public void indexDvObject(DvObject objectIn) throws SolrServerException, IOException { if (objectIn.isInstanceofDataset() ){ - return (indexDataset((Dataset)objectIn, true)); + asyncIndexDataset((Dataset)objectIn, true); + } else if (objectIn.isInstanceofDataverse() ){ + indexDataverse((Dataverse)objectIn); } - if (objectIn.isInstanceofDataverse() ){ - return (indexDataverse((Dataverse)objectIn)); - } - return null; + } + + private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + doIndexDataset(dataset, doNormalSolrDocCleanUp); + updateLastIndexedTime(dataset.getId()); } - public Future indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { logger.fine("indexing dataset " + dataset.getId()); /** * @todo should we use solrDocIdentifierDataset or @@ -548,7 +600,6 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true); @@ -595,11 +646,9 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: No released version and latest version is neither draft nor deaccessioned"; logger.fine(result); - return new AsyncResult<>(result); } } else if (atLeastOnePublishedVersion == true) { results.append("Published versions found. ") @@ -652,7 +701,6 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { IndexableDataset indexableDraftVersion = new IndexableDataset(latestVersion); @@ -706,16 +754,13 @@ public Future indexDataset(Dataset dataset, boolean doNormalSolrDocClean String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); logger.fine(result); indexDatasetPermissions(dataset); - return new AsyncResult<>(result); } else { String result = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft"; logger.fine(result); - return new AsyncResult<>(result); } } else { String result = "No-op. Unexpected condition reached: Has a version been published or not?"; logger.fine(result); - return new AsyncResult<>(result); } } @@ -741,10 +786,11 @@ private IndexResponse indexDatasetPermissions(Dataset dataset) { } private String addOrUpdateDataset(IndexableDataset indexableDataset) throws SolrServerException, IOException { - return addOrUpdateDataset(indexableDataset, null); + String result = addOrUpdateDataset(indexableDataset, null); + return result; } - public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { + public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set datafilesInDraftVersion) throws SolrServerException, IOException { IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); @@ -771,10 +817,15 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set d throw new IOException(ex); } } + return docs.getMessage(); + } + + @Asynchronous + private void updateLastIndexedTime(Long id) { + // indexing is often in a transaction with update statements + // if we flush on query (flush-mode auto), we want to prevent locking + // -> update the dataset asynchronously in a new transaction + updateLastIndexedTimeInNewTransaction(id); + } + + @TransactionAttribute(REQUIRES_NEW) + private void updateLastIndexedTimeInNewTransaction(Long id) { /// Dataset updatedDataset = /// (Dataset)dvObjectService.updateContentIndexTime(dataset); /// updatedDataset = null; // instead of making a call to dvObjectService, let's try and // modify the index time stamp using the local EntityManager: - DvObject dvObjectToModify = em.find(DvObject.class, docs.getDatasetId()); + DvObject dvObjectToModify = em.find(DvObject.class, id); dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime())); dvObjectToModify = em.merge(dvObjectToModify); - dvObjectToModify = null; - - return docs.getMessage(); } /** @@ -1630,7 +1694,11 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.addField(fieldName, doc.getFieldValue(fieldName)); } - List paths = object.isInstanceofDataset() ? retrieveDVOPaths(datasetService.find(object.getId())) + Dataset dataset = null; + if (object.isInstanceofDataset()) { + dataset = datasetService.findDeep(object.getId()); + } + List paths = object.isInstanceofDataset() ? retrieveDVOPaths(dataset) : retrieveDVOPaths(dataverseService.find(object.getId())); sid.removeField(SearchFields.SUBTREE); @@ -1638,7 +1706,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { - for (DataFile df : datasetService.find(object.getId()).getFiles()) { + for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); res = solrClientService.getSolrClient().query(solrQuery); if (!res.getResults().isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index bf6dddd621a..45f7f396783 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -2,6 +2,7 @@ import com.ocpsoft.pretty.PrettyContext; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; @@ -995,9 +996,29 @@ public boolean isAllowCustomTerms() { return settingsService.isTrueForKey(SettingsServiceBean.Key.AllowCustomTermsOfUse, safeDefaultIfKeyNotFound); } - public boolean isFilePIDsEnabled() { - boolean safeDefaultIfKeyNotFound = true; - return settingsService.isTrueForKey(SettingsServiceBean.Key.FilePIDsEnabled, safeDefaultIfKeyNotFound); + public boolean isFilePIDsEnabledForCollection(Dataverse collection) { + if (collection == null) { + return false; + } + + Dataverse thisCollection = collection; + + // If neither enabled nor disabled specifically for this collection, + // the parent collection setting is inhereted (recursively): + while (thisCollection.getFilePIDsEnabled() == null) { + if (thisCollection.getOwner() == null) { + // We've reached the root collection, and file PIDs registration + // hasn't been explicitly enabled, therefore we presume that it is + // subject to how the registration is configured for the + // entire instance: + return settingsService.isTrueForKey(SettingsServiceBean.Key.FilePIDsEnabled, true); + } + thisCollection = thisCollection.getOwner(); + } + + // If present, the setting of the first direct ancestor collection + // takes precedent: + return thisCollection.getFilePIDsEnabled(); } public boolean isIndependentHandleService() { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 9dc7fe6415b..9c31371fe8d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -240,6 +240,7 @@ else if (key.equals("https://dataverse.org/schema/core#fileRequestAccess")) { } } dsv.setTermsOfUseAndAccess(terms); + terms.setDatasetVersion(dsv); dsv.setDatasetFields(dsfl); return dsv; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 4fe9654cc64..59290449988 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -153,6 +153,10 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { } } } + + if (jobj.containsKey("filePIDsEnabled")) { + dv.setFilePIDsEnabled(jobj.getBoolean("filePIDsEnabled")); + } /* We decided that subject is not user set, but gotten from the subject of the dataverse's datasets - leavig this code in for now, in case we need to go back to it at some point diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index e9e8fcd1a90..97640aa226d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -305,6 +306,9 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { if(dv.getStorageDriverId() != null) { bld.add("storageDriverLabel", DataAccess.getStorageDriverLabelFor(dv.getStorageDriverId())); } + if (dv.getFilePIDsEnabled() != null) { + bld.add("filePIDsEnabled", dv.getFilePIDsEnabled()); + } return bld; } @@ -368,23 +372,28 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { } public static JsonObjectBuilder json(DatasetVersion dsv) { + return json(dsv, null); + } + + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList) { + Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() - .add("id", dsv.getId()).add("datasetId", dsv.getDataset().getId()) - .add("datasetPersistentId", dsv.getDataset().getGlobalId().asString()) - .add("storageIdentifier", dsv.getDataset().getStorageIdentifier()) + .add("id", dsv.getId()).add("datasetId", dataset.getId()) + .add("datasetPersistentId", dataset.getGlobalId().asString()) + .add("storageIdentifier", dataset.getStorageIdentifier()) .add("versionNumber", dsv.getVersionNumber()).add("versionMinorNumber", dsv.getMinorVersionNumber()) .add("versionState", dsv.getVersionState().name()).add("versionNote", dsv.getVersionNote()) .add("archiveNote", dsv.getArchiveNote()).add("deaccessionLink", dsv.getDeaccessionLink()) .add("distributionDate", dsv.getDistributionDate()).add("productionDate", dsv.getProductionDate()) .add("UNF", dsv.getUNF()).add("archiveTime", format(dsv.getArchiveTime())) .add("lastUpdateTime", format(dsv.getLastUpdateTime())).add("releaseTime", format(dsv.getReleaseTime())) - .add("createTime", format(dsv.getCreateTime())); - License license = DatasetUtil.getLicense(dsv);; + .add("createTime", format(dsv.getCreateTime())) + .add("alternativePersistentId", dataset.getAlternativePersistentIdentifier()) + .add("publicationDate", dataset.getPublicationDateFormattedYYYYMMDD()) + .add("citationDate", dataset.getCitationDateFormattedYYYYMMDD()); + License license = DatasetUtil.getLicense(dsv); if (license != null) { - // Standard license - bld.add("license", jsonObjectBuilder() - .add("name", DatasetUtil.getLicenseName(dsv)) - .add("uri", DatasetUtil.getLicenseURI(dsv))); + bld.add("license", jsonLicense(dsv)); } else { // Custom terms bld.add("termsOfUse", dsv.getTermsOfUseAndAccess().getTermsOfUse()) @@ -405,14 +414,15 @@ public static JsonObjectBuilder json(DatasetVersion dsv) { .add("studyCompletion", dsv.getTermsOfUseAndAccess().getStudyCompletion()) .add("fileAccessRequest", dsv.getTermsOfUseAndAccess().isFileAccessRequest()); - bld.add("metadataBlocks", jsonByBlocks(dsv.getDatasetFields())); - + bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? + jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) + : jsonByBlocks(dsv.getDatasetFields()) + ); bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); return bld; } - - + public static JsonObjectBuilder jsonDataFileList(List dataFiles){ if (dataFiles==null){ @@ -485,11 +495,15 @@ public static JsonObjectBuilder json(DatasetDistributor dist) { } public static JsonObjectBuilder jsonByBlocks(List fields) { + return jsonByBlocks(fields, null); + } + + public static JsonObjectBuilder jsonByBlocks(List fields, List anonymizedFieldTypeNamesList) { JsonObjectBuilder blocksBld = jsonObjectBuilder(); for (Map.Entry> blockAndFields : DatasetField.groupByBlock(fields).entrySet()) { MetadataBlock block = blockAndFields.getKey(); - blocksBld.add(block.getName(), JsonPrinter.json(block, blockAndFields.getValue())); + blocksBld.add(block.getName(), JsonPrinter.json(block, blockAndFields.getValue(), anonymizedFieldTypeNamesList)); } return blocksBld; } @@ -503,6 +517,10 @@ public static JsonObjectBuilder jsonByBlocks(List fields) { * @return JSON Object builder with the block and fields information. */ public static JsonObjectBuilder json(MetadataBlock block, List fields) { + return json(block, fields, null); + } + + public static JsonObjectBuilder json(MetadataBlock block, List fields, List anonymizedFieldTypeNamesList) { JsonObjectBuilder blockBld = jsonObjectBuilder(); blockBld.add("displayName", block.getDisplayName()); @@ -510,7 +528,7 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie final JsonArrayBuilder fieldsArray = Json.createArrayBuilder(); Map cvocMap = (datasetFieldService==null) ? new HashMap() :datasetFieldService.getCVocConf(true); - DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray)); + DatasetFieldWalker.walk(fields, settingsService, cvocMap, new DatasetFieldsToJson(fieldsArray, anonymizedFieldTypeNamesList)); blockBld.add("fields", fieldsArray); return blockBld; @@ -599,7 +617,7 @@ public static JsonObjectBuilder json(FileMetadata fmd) { .add("version", fmd.getVersion()) .add("datasetVersionId", fmd.getDatasetVersion().getId()) .add("categories", getFileCategories(fmd)) - .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd)); + .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false)); } public static JsonObjectBuilder json(AuxiliaryFile auxFile) { @@ -615,10 +633,10 @@ public static JsonObjectBuilder json(AuxiliaryFile auxFile) { .add("dataFile", JsonPrinter.json(auxFile.getDataFile())); } public static JsonObjectBuilder json(DataFile df) { - return JsonPrinter.json(df, null); + return JsonPrinter.json(df, null, false); } - public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider) { // File names are no longer stored in the DataFile entity; // (they are instead in the FileMetadata (as "labels") - this way // the filename can change between versions... @@ -643,7 +661,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { JsonObjectBuilder embargo = df.getEmbargo() != null ? JsonPrinter.json(df.getEmbargo()) : null; - return jsonObjectBuilder() + NullSafeJsonBuilder builder = jsonObjectBuilder() .add("id", df.getId()) .add("persistentId", pidString) .add("pidURL", pidURL) @@ -654,7 +672,6 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { .add("categories", getFileCategories(fileMetadata)) .add("embargo", embargo) //.add("released", df.isReleased()) - .add("restricted", df.isRestricted()) .add("storageIdentifier", df.getStorageIdentifier()) .add("originalFileFormat", df.getOriginalFileFormat()) .add("originalFormatLabel", df.getOriginalFormatLabel()) @@ -673,20 +690,24 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata) { //--------------------------------------------- .add("md5", getMd5IfItExists(df.getChecksumType(), df.getChecksumValue())) .add("checksum", getChecksumTypeAndValue(df.getChecksumType(), df.getChecksumValue())) - .add("fileMetadataId", fileMetadata.getId()) .add("tabularTags", getTabularFileTags(df)) - .add("creationDate", df.getCreateDateFormattedYYYYMMDD()) - .add("dataTables", df.getDataTables().isEmpty() ? null : JsonPrinter.jsonDT(df.getDataTables())) - .add("varGroups", fileMetadata.getVarGroups().isEmpty() - ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups()) - : null); + .add("creationDate", df.getCreateDateFormattedYYYYMMDD()); + /* + * The restricted state was not included prior to #9175 so to avoid backward + * incompatability, it is now only added when generating json for the + * InternalExportDataProvider fileDetails. + */ + if (forExportDataProvider) { + builder.add("restricted", df.isRestricted()); + } + return builder; } //Started from https://github.com/RENCI-NRIG/dataverse/, i.e. https://github.com/RENCI-NRIG/dataverse/commit/2b5a1225b42cf1caba85e18abfeb952171c6754a public static JsonArrayBuilder jsonDT(List ldt) { JsonArrayBuilder ldtArr = Json.createArrayBuilder(); for(DataTable dt: ldt){ - ldtArr.add(jsonObjectBuilder().add("dataTable", JsonPrinter.json(dt))); + ldtArr.add(JsonPrinter.json(dt)); } return ldtArr; } @@ -714,6 +735,7 @@ public static JsonArrayBuilder jsonDV(List dvl) { // TODO: add sumstat and variable categories, check formats public static JsonObjectBuilder json(DataVariable dv) { return jsonObjectBuilder() + .add("id", dv.getId()) .add("name", dv.getName()) .add("label", dv.getLabel()) .add("weighted", dv.isWeighted()) @@ -729,9 +751,9 @@ public static JsonObjectBuilder json(DataVariable dv) { .add("recordSegmentNumber", dv.getRecordSegmentNumber()) .add("numberOfDecimalPoints",dv.getNumberOfDecimalPoints()) .add("variableMetadata",jsonVarMetadata(dv.getVariableMetadatas())) - .add("invalidRanges", dv.getInvalidRanges().isEmpty() ? JsonPrinter.jsonInvalidRanges(dv.getInvalidRanges()) : null) - .add("summaryStatistics", dv.getSummaryStatistics().isEmpty() ? JsonPrinter.jsonSumStat(dv.getSummaryStatistics()) : null) - .add("variableCategories", dv.getCategories().isEmpty() ? JsonPrinter.jsonCatStat(dv.getCategories()) : null) + .add("invalidRanges", dv.getInvalidRanges().isEmpty() ? null : JsonPrinter.jsonInvalidRanges(dv.getInvalidRanges())) + .add("summaryStatistics", dv.getSummaryStatistics().isEmpty() ? null : JsonPrinter.jsonSumStat(dv.getSummaryStatistics())) + .add("variableCategories", dv.getCategories().isEmpty() ? null : JsonPrinter.jsonCatStat(dv.getCategories())) ; } @@ -895,12 +917,16 @@ private static class DatasetFieldsToJson implements DatasetFieldWalker.Listener Deque objectStack = new LinkedList<>(); Deque valueArrStack = new LinkedList<>(); - JsonObjectBuilder result = null; - + List anonymizedFieldTypeNamesList = null; DatasetFieldsToJson(JsonArrayBuilder result) { valueArrStack.push(result); } + DatasetFieldsToJson(JsonArrayBuilder result, List anonymizedFieldTypeNamesList) { + this(result); + this.anonymizedFieldTypeNamesList = anonymizedFieldTypeNamesList; + } + @Override public void startField(DatasetField f) { objectStack.push(jsonObjectBuilder()); @@ -925,15 +951,19 @@ public void endField(DatasetField f) { JsonArray expandedValues = valueArrStack.pop().build(); JsonArray jsonValues = valueArrStack.pop().build(); if (!jsonValues.isEmpty()) { - jsonField.add("value", - f.getDatasetFieldType().isAllowMultiples() ? jsonValues - : jsonValues.get(0)); - if (!expandedValues.isEmpty()) { - jsonField.add("expandedvalue", - f.getDatasetFieldType().isAllowMultiples() ? expandedValues - : expandedValues.get(0)); + String datasetFieldName = f.getDatasetFieldType().getName(); + if (anonymizedFieldTypeNamesList != null && anonymizedFieldTypeNamesList.contains(datasetFieldName)) { + anonymizeField(jsonField); + } else { + jsonField.add("value", + f.getDatasetFieldType().isAllowMultiples() ? jsonValues + : jsonValues.get(0)); + if (!expandedValues.isEmpty()) { + jsonField.add("expandedvalue", + f.getDatasetFieldType().isAllowMultiples() ? expandedValues + : expandedValues.get(0)); + } } - valueArrStack.peek().add(jsonField); } } @@ -978,6 +1008,12 @@ public void endCompoundValue(DatasetFieldCompoundValue dsfcv) { valueArrStack.peek().add(jsonField); } } + + private void anonymizeField(JsonObjectBuilder jsonField) { + jsonField.add("typeClass", "primitive"); + jsonField.add("value", BundleUtil.getStringFromBundle("dataset.anonymized.withheld")); + jsonField.add("multiple", false); + } } public static JsonObjectBuilder json(AuthenticationProviderRow aRow) { @@ -1157,4 +1193,15 @@ public static JsonObjectBuilder jsonLinkset(Dataset ds) { .add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD()) .add("storageIdentifier", ds.getStorageIdentifier()); } + + private static JsonObjectBuilder jsonLicense(DatasetVersion dsv) { + JsonObjectBuilder licenseJsonObjectBuilder = jsonObjectBuilder() + .add("name", DatasetUtil.getLicenseName(dsv)) + .add("uri", DatasetUtil.getLicenseURI(dsv)); + String licenseIconUri = DatasetUtil.getLicenseIcon(dsv); + if (licenseIconUri != null) { + licenseJsonObjectBuilder.add("iconUri", licenseIconUri); + } + return licenseJsonObjectBuilder; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index d57b7072be7..cf78c4f8cdf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -394,7 +394,7 @@ private void workflowCompleted(Workflow wf, WorkflowContext ctxt) { String dataFilePIDFormat = settings.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = (currentGlobalIdProtocol.equals(ctxt.getDataset().getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && systemConfig.isFilePIDsEnabled(); + && systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()); if ( registerGlobalIdsForFiles ){ registerGlobalIdsForFiles = currentGlobalAuthority.equals( ctxt.getDataset().getAuthority() ); } diff --git a/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql b/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql new file mode 100644 index 00000000000..5e6ce945fe2 --- /dev/null +++ b/src/main/resources/db/migration/V5.13.0.2__8889-filepids-in-collections.sql @@ -0,0 +1 @@ +ALTER TABLE dataverse ADD COLUMN IF NOT EXISTS filePIDsEnabled bool; diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index a4e635b8c14..1a049331ae4 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -11,9 +11,20 @@ xmlns:o="http://omnifaces.org/ui" xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs"> - - - + + + + + + + + + + + + + +
    @@ -109,7 +120,7 @@ $(document).ready(function () { uploadWidgetDropMsg(); - setupDirectUpload(#{systemConfig.directUploadEnabled(EditDatafilesPage.dataset)}); + #{useDirectUpload ? 'setupDirectUpload(true);':''} }); //]]> @@ -584,7 +595,7 @@

    #{EditDatafilesPage.warningMessageForFileTypeDifferentPopUp}