diff --git a/.env b/.env index e3ececc2e54..d5cffcec0aa 100644 --- a/.env +++ b/.env @@ -1,4 +1,5 @@ APP_IMAGE=gdcc/dataverse:unstable -POSTGRES_VERSION=13 +POSTGRES_VERSION=16 DATAVERSE_DB_USER=dataverse SOLR_VERSION=9.3.0 +SKIP_DEPLOY=0 \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000000..9860024f70a --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# https://www.git-scm.com/docs/gitattributes + +# This set mandatory LF line endings for .sh files preventing from windows users to having to change the value of their git config --global core.autocrlf to 'false' or 'input' +*.sh text eol=lf \ No newline at end of file diff --git a/.github/workflows/maven_cache_management.yml b/.github/workflows/maven_cache_management.yml new file mode 100644 index 00000000000..fedf63b7c54 --- /dev/null +++ b/.github/workflows/maven_cache_management.yml @@ -0,0 +1,101 @@ +name: Maven Cache Management + +on: + # Every push to develop should trigger cache rejuvenation (dependencies might have changed) + push: + branches: + - develop + # According to https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy + # all caches are deleted after 7 days of no access. Make sure we rejuvenate every 7 days to keep it available. + schedule: + - cron: '23 2 * * 0' # Run for 'develop' every Sunday at 02:23 UTC (3:23 CET, 21:23 ET) + # Enable manual cache management + workflow_dispatch: + # Delete branch caches once a PR is merged + pull_request: + types: + - closed + +env: + COMMON_CACHE_KEY: "dataverse-maven-cache" + COMMON_CACHE_PATH: "~/.m2/repository" + +jobs: + seed: + name: Drop and Re-Seed Local Repository + runs-on: ubuntu-latest + if: ${{ github.event_name != 'pull_request' }} + permissions: + # Write permission needed to delete caches + # See also: https://docs.github.com/en/rest/actions/cache?apiVersion=2022-11-28#delete-a-github-actions-cache-for-a-repository-using-a-cache-id + actions: write + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Determine Java version from Parent POM + run: echo "JAVA_VERSION=$(grep '' modules/dataverse-parent/pom.xml | cut -f2 -d'>' | cut -f1 -d'<')" >> ${GITHUB_ENV} + - name: Set up JDK ${{ env.JAVA_VERSION }} + uses: actions/setup-java@v4 + with: + java-version: ${{ env.JAVA_VERSION }} + distribution: temurin + - name: Seed common cache + run: | + mvn -B -f modules/dataverse-parent dependency:go-offline dependency:resolve-plugins + # This non-obvious order is due to the fact that the download via Maven above will take a very long time (7-8 min). + # Jobs should not be left without a cache. Deleting and saving in one go leaves only a small chance for a cache miss. + - name: Drop common cache + run: | + gh extension install actions/gh-actions-cache + echo "🛒 Fetching list of cache keys" + cacheKeys=$(gh actions-cache list -R ${{ github.repository }} -B develop | cut -f 1 ) + + ## Setting this to not fail the workflow while deleting cache keys. + set +e + echo "🗑️ Deleting caches..." + for cacheKey in $cacheKeys + do + gh actions-cache delete $cacheKey -R ${{ github.repository }} -B develop --confirm + done + echo "✅ Done" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Save the common cache + uses: actions/cache@v4 + with: + path: ${{ env.COMMON_CACHE_PATH }} + key: ${{ env.COMMON_CACHE_KEY }} + enableCrossOsArchive: true + + # Let's delete feature branch caches once their PR is merged - we only have 10 GB of space before eviction kicks in + deplete: + name: Deplete feature branch caches + runs-on: ubuntu-latest + if: ${{ github.event_name == 'pull_request' }} + permissions: + # `actions:write` permission is required to delete caches + # See also: https://docs.github.com/en/rest/actions/cache?apiVersion=2022-11-28#delete-a-github-actions-cache-for-a-repository-using-a-cache-id + actions: write + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Cleanup caches + run: | + gh extension install actions/gh-actions-cache + + BRANCH=refs/pull/${{ github.event.pull_request.number }}/merge + echo "🛒 Fetching list of cache keys" + cacheKeysForPR=$(gh actions-cache list -R ${{ github.repository }} -B $BRANCH | cut -f 1 ) + + ## Setting this to not fail the workflow while deleting cache keys. + set +e + echo "🗑️ Deleting caches..." + for cacheKey in $cacheKeysForPR + do + gh actions-cache delete $cacheKey -R ${{ github.repository }} -B $BRANCH --confirm + done + echo "✅ Done" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/maven_unit_test.yml b/.github/workflows/maven_unit_test.yml index a70c55fc31d..4ad4798bc64 100644 --- a/.github/workflows/maven_unit_test.yml +++ b/.github/workflows/maven_unit_test.yml @@ -4,6 +4,7 @@ on: push: paths: - "**.java" + - "**.sql" - "pom.xml" - "modules/**/pom.xml" - "!modules/container-base/**" @@ -11,6 +12,7 @@ on: pull_request: paths: - "**.java" + - "**.sql" - "pom.xml" - "modules/**/pom.xml" - "!modules/container-base/**" diff --git a/.gitignore b/.gitignore index 7f0d3a2b466..514f82116de 100644 --- a/.gitignore +++ b/.gitignore @@ -34,6 +34,7 @@ oauth-credentials.md /src/main/webapp/oauth2/newAccount.html scripts/api/setup-all.sh* scripts/api/setup-all.*.log +src/main/resources/edu/harvard/iq/dataverse/openapi/ # ctags generated tag file tags @@ -61,3 +62,4 @@ src/main/webapp/resources/images/dataverseproject.png.thumb140 # Docker development volumes /docker-dev-volumes +/.vs diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b2be8f531c4..4fa6e955b70 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,67 +1,7 @@ # Contributing to Dataverse -Thank you for your interest in contributing to Dataverse! We are open to contributions from everyone. You don't need permission to participate. Just jump in. If you have questions, please reach out using one or more of the channels described below. +Thank you for your interest in contributing to Dataverse! We are open to contributions from everyone. -We aren't just looking for developers. There are many ways to contribute to Dataverse. We welcome contributions of ideas, bug reports, usability research/feedback, documentation, code, and more! +Please see our [Contributor Guide][] for how you can help! -## Ideas/Feature Requests - -Your idea or feature request might already be captured in the Dataverse [issue tracker] on GitHub but if not, the best way to bring it to the community's attention is by posting on the [dataverse-community Google Group][] or bringing it up on a [Community Call][]. You're also welcome to make some noise in [chat.dataverse.org][] or cram your idea into 280 characters and mention [@dataverseorg][] on Twitter. To discuss your idea privately, please email it to support@dataverse.org - -There's a chance your idea is already on our roadmap, which is available at https://www.iq.harvard.edu/roadmap-dataverse-project - -[chat.dataverse.org]: http://chat.dataverse.org -[issue tracker]: https://github.com/IQSS/dataverse/issues -[@dataverseorg]: https://twitter.com/dataverseorg - -## Usability testing - -Please email us at support@dataverse.org if you are interested in participating in usability testing. - -## Bug Reports/Issues - -An issue is a bug (a feature is no longer behaving the way it should) or a feature (something new to Dataverse that helps users complete tasks). You can browse the Dataverse [issue tracker] on GitHub by open or closed issues or by milestones. - -Before submitting an issue, please search the existing issues by using the search bar at the top of the page. If there is an existing open issue that matches the issue you want to report, please add a comment to it. - -If there is no pre-existing issue or it has been closed, please click on the "New Issue" button, log in, and write in what the issue is (unless it is a security issue which should be reported privately to security@dataverse.org). - -If you do not receive a reply to your new issue or comment in a timely manner, please email support@dataverse.org with a link to the issue. - -### Writing an Issue - -For the subject of an issue, please start it by writing the feature or functionality it relates to, i.e. "Create Account:..." or "Dataset Page:...". In the body of the issue, please outline the issue you are reporting with as much detail as possible. In order for the Dataverse development team to best respond to the issue, we need as much information about the issue as you can provide. Include steps to reproduce bugs. Indicate which version you're using, which is shown at the bottom of the page. We love screenshots! - -### Issue Attachments - -You can attach certain files (images, screenshots, logs, etc.) by dragging and dropping, selecting them, or pasting from the clipboard. Files must be one of GitHub's [supported attachment formats] such as png, gif, jpg, txt, pdf, zip, etc. (Pro tip: A file ending in .log can be renamed to .txt so you can upload it.) If there's no easy way to attach your file, please include a URL that points to the file in question. - -[supported attachment formats]: https://help.github.com/articles/file-attachments-on-issues-and-pull-requests/ - -## Documentation - -The source for the documentation at http://guides.dataverse.org/en/latest/ is in the GitHub repo under the "[doc][]" folder. If you find a typo or inaccuracy or something to clarify, please send us a pull request! For more on the tools used to write docs, please see the [documentation][] section of the Developer Guide. - -[doc]: https://github.com/IQSS/dataverse/tree/develop/doc/sphinx-guides/source -[documentation]: http://guides.dataverse.org/en/latest/developers/documentation.html - -## Code/Pull Requests - -We love code contributions. Developers are not limited to the main Dataverse code in this git repo. You can help with API client libraries in your favorite language that are mentioned in the [API Guide][] or create a new library. You can help work on configuration management code that's mentioned in the [Installation Guide][]. The Installation Guide also covers a relatively new concept called "external tools" that allows developers to create their own tools that are available from within an installation of Dataverse. - -[API Guide]: http://guides.dataverse.org/en/latest/api -[Installation Guide]: http://guides.dataverse.org/en/latest/installation - -If you are interested in working on the main Dataverse code, great! Before you start coding, please reach out to us either on the [dataverse-community Google Group][], the [dataverse-dev Google Group][], [chat.dataverse.org][], or via support@dataverse.org to make sure the effort is well coordinated and we avoid merge conflicts. We maintain a list of [community contributors][] and [dev efforts][] the community is working on so please let us know if you'd like to be added or removed from either list. - -Please read http://guides.dataverse.org/en/latest/developers/version-control.html to understand how we use the "git flow" model of development and how we will encourage you to create a GitHub issue (if it doesn't exist already) to associate with your pull request. That page also includes tips on making a pull request. - -After making your pull request, your goal should be to help it advance through our kanban board at https://github.com/orgs/IQSS/projects/2 . If no one has moved your pull request to the code review column in a timely manner, please reach out. Note that once a pull request is created for an issue, we'll remove the issue from the board so that we only track one card (the pull request). - -Thanks for your contribution! - -[dataverse-community Google Group]: https://groups.google.com/group/dataverse-community -[Community Call]: https://dataverse.org/community-calls -[dataverse-dev Google Group]: https://groups.google.com/group/dataverse-dev -[community contributors]: https://docs.google.com/spreadsheets/d/1o9DD-MQ0WkrYaEFTD5rF_NtyL8aUISgURsAXSL7Budk/edit?usp=sharing -[dev efforts]: https://github.com/orgs/IQSS/projects/2#column-5298405 +[Contributor Guide]: https://guides.dataverse.org/en/latest/contributor/index.html diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index b0864a0c55f..00000000000 --- a/Dockerfile +++ /dev/null @@ -1 +0,0 @@ -# See http://guides.dataverse.org/en/latest/developers/containers.html diff --git a/README.md b/README.md index 831dbfed5ff..77720453d5f 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,11 @@ Dataverse® Dataverse is an [open source][] software platform for sharing, finding, citing, and preserving research data (developed by the [Dataverse team](https://dataverse.org/about) at the [Institute for Quantitative Social Science](https://iq.harvard.edu/) and the [Dataverse community][]). -[dataverse.org][] is our home on the web and shows a map of Dataverse installations around the world, a list of [features][], [integrations][] that have been made possible through [REST APIs][], our development [roadmap][], and more. +[dataverse.org][] is our home on the web and shows a map of Dataverse installations around the world, a list of [features][], [integrations][] that have been made possible through [REST APIs][], our [project board][], our development [roadmap][], and more. We maintain a demo site at [demo.dataverse.org][] which you are welcome to use for testing and evaluating Dataverse. -To install Dataverse, please see our [Installation Guide][] which will prompt you to download our [latest release][]. +To install Dataverse, please see our [Installation Guide][] which will prompt you to download our [latest release][]. Docker users should consult the [Container Guide][]. To discuss Dataverse with the community, please join our [mailing list][], participate in a [community call][], chat with us at [chat.dataverse.org][], or attend our annual [Dataverse Community Meeting][]. @@ -28,7 +28,9 @@ Dataverse is a trademark of President and Fellows of Harvard College and is regi [Dataverse community]: https://dataverse.org/developers [Installation Guide]: https://guides.dataverse.org/en/latest/installation/index.html [latest release]: https://github.com/IQSS/dataverse/releases +[Container Guide]: https://guides.dataverse.org/en/latest/container/index.html [features]: https://dataverse.org/software-features +[project board]: https://github.com/orgs/IQSS/projects/34 [roadmap]: https://www.iq.harvard.edu/roadmap-dataverse-project [integrations]: https://dataverse.org/integrations [REST APIs]: https://guides.dataverse.org/en/latest/api/index.html diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh new file mode 100755 index 00000000000..fe940d9890d --- /dev/null +++ b/conf/localstack/buckets.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack +awslocal s3 mb s3://mybucket diff --git a/conf/proxy/Caddyfile b/conf/proxy/Caddyfile new file mode 100644 index 00000000000..70e6904d26e --- /dev/null +++ b/conf/proxy/Caddyfile @@ -0,0 +1,12 @@ +# This configuration is intended to be used with Caddy, a very small high perf proxy. +# It will serve the application containers Payara Admin GUI via HTTP instead of HTTPS, +# avoiding the trouble of self signed certificates for local development. + +:4848 { + reverse_proxy https://dataverse:4848 { + transport http { + tls_insecure_skip_verify + } + header_down Location "^https://" "http://" + } +} diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/schema.xml similarity index 99% rename from conf/solr/9.3.0/schema.xml rename to conf/solr/schema.xml index 3711ffeddba..5dde750573d 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/schema.xml @@ -157,7 +157,8 @@ - + + @@ -229,6 +230,8 @@ + + - ${solr.autoCommit.maxTime:15000} + ${solr.autoCommit.maxTime:30000} false @@ -301,7 +301,7 @@ --> - ${solr.autoSoftCommit.maxTime:-1} + ${solr.autoSoftCommit.maxTime:1000} + + + - conf/solr/9.3.0 + conf/solr solr diff --git a/modules/container-configbaker/scripts/bootstrap/demo/init.sh b/modules/container-configbaker/scripts/bootstrap/demo/init.sh new file mode 100644 index 00000000000..e8d1d07dd2d --- /dev/null +++ b/modules/container-configbaker/scripts/bootstrap/demo/init.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +set -euo pipefail + +# Set some defaults +DATAVERSE_URL=${DATAVERSE_URL:-"http://dataverse:8080"} +export DATAVERSE_URL + +BLOCKED_API_KEY=${BLOCKED_API_KEY:-"unblockme"} +export BLOCKED_API_KEY + +# --insecure is used so we can configure a few things but +# later in this script we'll apply the changes as if we had +# run the script without --insecure. +echo "Running base setup-all.sh..." +"${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out + +echo "" +echo "Setting DOI provider to \"FAKE\"..." +curl -sS -X PUT -d FAKE "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" + +echo "" +echo "Revoke the key that allows for creation of builtin users..." +curl -sS -X DELETE "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" + +echo "" +echo "Set key for accessing blocked API endpoints..." +curl -sS -X PUT -d "$BLOCKED_API_KEY" "${DATAVERSE_URL}/api/admin/settings/:BlockedApiKey" + +echo "" +echo "Set policy to only allow access to admin APIs with with a key..." +curl -sS -X PUT -d unblock-key "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" + +echo "" +echo "Block admin and other sensitive API endpoints..." +curl -sS -X PUT -d 'admin,builtin-users' "${DATAVERSE_URL}/api/admin/settings/:BlockedApiEndpoints" + +echo "" +echo "Done, your instance has been configured for demo or eval. Have a nice day!" diff --git a/modules/container-configbaker/scripts/bootstrap/dev/init.sh b/modules/container-configbaker/scripts/bootstrap/dev/init.sh index efdaee3d0c3..f8770436652 100644 --- a/modules/container-configbaker/scripts/bootstrap/dev/init.sh +++ b/modules/container-configbaker/scripts/bootstrap/dev/init.sh @@ -9,9 +9,6 @@ export DATAVERSE_URL echo "Running base setup-all.sh (INSECURE MODE)..." "${BOOTSTRAP_DIR}"/base/setup-all.sh --insecure -p=admin1 | tee /tmp/setup-all.sh.out -echo "Setting system mail address..." -curl -X PUT -d "dataverse@localhost" "${DATAVERSE_URL}/api/admin/settings/:SystemEmail" - echo "Setting DOI provider to \"FAKE\"..." curl "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" -X PUT -d FAKE diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index db0fa46a952..62efbf62317 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -40,8 +40,8 @@ com.google.cloud - google-cloud-bom - ${google.cloud.version} + libraries-bom + ${google.library.version} pom import @@ -131,7 +131,7 @@ - 6.0 + 6.3 17 UTF-8 @@ -148,19 +148,18 @@ -Duser.timezone=${project.timezone} -Dfile.encoding=${project.build.sourceEncoding} -Duser.language=${project.language} -Duser.region=${project.region} - 6.2023.8 - 42.6.0 - 9.3.0 - 1.12.290 - 0.177.0 + 6.2024.6 + 42.7.2 + 9.4.1 + 1.12.748 + 26.30.0 - 8.0.0 1.7.35 - 2.11.0 + 2.15.1 1.2 3.12.0 - 1.21 + 1.26.0 4.5.13 4.4.14 @@ -168,11 +167,11 @@ 5.2.0 - 1.19.0 - 2.10.1 - 5.10.0 - 5.4.0 - 0.8.10 + 1.19.7 + 3.7.1 + 5.10.2 + 5.11.0 + 0.8.11 9.3 @@ -182,8 +181,8 @@ 3.3.2 3.5.0 3.1.1 - 3.1.0 - 3.1.0 + 3.2.5 + 3.2.5 3.6.0 3.3.1 3.0.0-M7 @@ -386,18 +385,6 @@ false - - - payara-patched-externals - Payara Patched Externals - https://raw.github.com/payara/Payara_PatchedProjects/master - - true - - - false - - central-repo Central Repository diff --git a/pom.xml b/pom.xml index 5536bcccb05..76a8f61444f 100644 --- a/pom.xml +++ b/pom.xml @@ -27,11 +27,17 @@ war 1.2.18.4 - 9.21.2 + 9.22.1 1.20.1 5.2.1 2.4.1 5.5.3 + + Dataverse API + ${project.version} + Open source research data repository software. + + ${project.build.outputDirectory}/META-INF - + - ${payara.version} @@ -210,6 +234,18 @@ provided + + + jakarta.json.bind + jakarta.json.bind-api + + + + org.eclipse + yasson + test + + org.glassfish @@ -283,7 +319,7 @@ org.apache.solr solr-solrj - 9.3.0 + 9.4.1 colt @@ -466,7 +502,7 @@ org.duracloud common - 7.1.1 + 8.0.0 org.slf4j @@ -481,7 +517,7 @@ org.duracloud storeclient - 7.1.1 + 8.0.0 org.slf4j @@ -514,6 +550,11 @@ opennlp-tools 1.9.1 + + org.xmlunit + xmlunit-core + 2.9.1 + com.google.cloud google-cloud-storage @@ -532,6 +573,12 @@ java-json-canonicalization 1.1 + + + io.gdcc + sitemapgen4j + 2.1.2 + edu.ucar cdm-core @@ -542,6 +589,10 @@ dataverse-spi 2.0.0 + + javax.cache + cache-api + org.junit.jupiter @@ -612,6 +663,11 @@ 3.0.0 test + + org.testcontainers + localstack + test + + process-classes + + ${openapi.outputDirectory} + openapi + ${openapi.infoTitle} + ${openapi.infoVersion} + ${openapi.infoDescription} + CLASS_METHOD + edu.harvard.iq.dataverse + true + + + + @@ -906,10 +993,11 @@ true docker-build - 13 + 16 gdcc/dataverse:${app.image.tag} unstable + false gdcc/base:${base.image.tag} unstable gdcc/configbaker:${conf.image.tag} @@ -922,6 +1010,7 @@ ${postgresql.server.version} ${solr.version} dataverse + ${app.skipDeploy} @@ -1035,4 +1124,4 @@ - + \ No newline at end of file diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index 1118ed98a03..cc856c6372f 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -231,6 +231,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -251,6 +257,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -907,14 +919,14 @@ "typeClass": "primitive", "value": "-70" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "43" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "42" @@ -933,14 +945,14 @@ "typeClass": "primitive", "value": "-13" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "29" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "28" diff --git a/scripts/api/data/licenses/licenseApache-2.0.json b/scripts/api/data/licenses/licenseApache-2.0.json new file mode 100644 index 00000000000..5b7c3cf5c95 --- /dev/null +++ b/scripts/api/data/licenses/licenseApache-2.0.json @@ -0,0 +1,8 @@ +{ + "name": "Apache-2.0", + "uri": "http://www.apache.org/licenses/LICENSE-2.0", + "shortDescription": "Apache License 2.0", + "active": true, + "sortOrder": 9 + } + \ No newline at end of file diff --git a/scripts/api/data/licenses/licenseMIT.json b/scripts/api/data/licenses/licenseMIT.json new file mode 100644 index 00000000000..a879e8a5595 --- /dev/null +++ b/scripts/api/data/licenses/licenseMIT.json @@ -0,0 +1,7 @@ +{ + "name": "MIT", + "uri": "https://opensource.org/licenses/MIT", + "shortDescription": "MIT License", + "active": true, + "sortOrder": 8 +} diff --git a/scripts/api/data/metadatablocks/astrophysics.tsv b/scripts/api/data/metadatablocks/astrophysics.tsv index 4039d32cb75..92792d404c9 100644 --- a/scripts/api/data/metadatablocks/astrophysics.tsv +++ b/scripts/api/data/metadatablocks/astrophysics.tsv @@ -2,13 +2,13 @@ astrophysics Astronomy and Astrophysics Metadata #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id astroType Type The nature or genre of the content of the files in the dataset. text 0 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics - astroFacility Facility The observatory or facility where the data was obtained. text 1 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics - astroInstrument Instrument The instrument used to collect the data. text 2 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics + astroFacility Facility The observatory or facility where the data was obtained. text 1 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics + astroInstrument Instrument The instrument used to collect the data. text 2 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics astroObject Object Astronomical Objects represented in the data (Given as SIMBAD recognizable names preferred). text 3 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics resolution.Spatial Spatial Resolution The spatial (angular) resolution that is typical of the observations, in decimal degrees. text 4 TRUE FALSE FALSE TRUE FALSE FALSE astrophysics resolution.Spectral Spectral Resolution The spectral resolution that is typical of the observations, given as the ratio \u03bb/\u0394\u03bb. text 5 TRUE FALSE FALSE TRUE FALSE FALSE astrophysics resolution.Temporal Time Resolution The temporal resolution that is typical of the observations, given in seconds. text 6 FALSE FALSE FALSE FALSE FALSE FALSE astrophysics - coverage.Spectral.Bandpass Bandpass Conventional bandpass name text 7 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics + coverage.Spectral.Bandpass Bandpass Conventional bandpass name text 7 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics coverage.Spectral.CentralWavelength Central Wavelength (m) The central wavelength of the spectral bandpass, in meters. Enter a floating-point number. float 8 TRUE FALSE TRUE TRUE FALSE FALSE astrophysics coverage.Spectral.Wavelength Wavelength Range The minimum and maximum wavelength of the spectral bandpass. Enter a floating-point number. none 9 FALSE FALSE TRUE FALSE FALSE FALSE astrophysics coverage.Spectral.MinimumWavelength Minimum (m) The minimum wavelength of the spectral bandpass, in meters. Enter a floating-point number. float 10 TRUE FALSE FALSE TRUE FALSE FALSE coverage.Spectral.Wavelength astrophysics diff --git a/scripts/api/data/metadatablocks/biomedical.tsv b/scripts/api/data/metadatablocks/biomedical.tsv index 28d59130c34..06f1ebec1b4 100644 --- a/scripts/api/data/metadatablocks/biomedical.tsv +++ b/scripts/api/data/metadatablocks/biomedical.tsv @@ -13,7 +13,7 @@ studyAssayOtherTechnologyType Other Technology Type If Other was selected in Technology Type, list any other technology types that were used in this Dataset. text 9 TRUE FALSE TRUE TRUE FALSE FALSE biomedical studyAssayPlatform Technology Platform The manufacturer and name of the technology platform used in the assay (e.g. Bruker AVANCE). text 10 TRUE TRUE TRUE TRUE FALSE FALSE biomedical studyAssayOtherPlatform Other Technology Platform If Other was selected in Technology Platform, list any other technology platforms that were used in this Dataset. text 11 TRUE FALSE TRUE TRUE FALSE FALSE biomedical - studyAssayCellType Cell Type The name of the cell line from which the source or sample derives. text 12 TRUE TRUE TRUE TRUE FALSE FALSE biomedical + studyAssayCellType Cell Type The name of the cell line from which the source or sample derives. text 12 TRUE FALSE TRUE TRUE FALSE FALSE biomedical #controlledVocabulary DatasetField Value identifier displayOrder studyDesignType Case Control EFO_0001427 0 studyDesignType Cross Sectional EFO_0001428 1 @@ -45,6 +45,7 @@ studyFactorType Treatment Compound EFO_0000369 17 studyFactorType Treatment Type EFO_0000727 18 studyFactorType Other OTHER_FACTOR 19 + studyAssayMeasurementType cell counting ERO_0001899 0 studyAssayMeasurementType cell sorting CHMO_0001085 1 studyAssayMeasurementType clinical chemistry analysis OBI_0000520 2 studyAssayMeasurementType copy number variation profiling OBI_0000537 3 diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b21b6bcce57..18354f2b1f7 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -23,62 +23,63 @@ subject Subject The area of study relevant to the Dataset text 19 TRUE TRUE TRUE TRUE TRUE TRUE citation http://purl.org/dc/terms/subject keyword Keyword A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used none 20 FALSE FALSE TRUE FALSE TRUE FALSE citation keywordValue Term A key term that describes important aspects of the Dataset text 21 #VALUE TRUE FALSE FALSE TRUE TRUE FALSE keyword citation - keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 22 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 23 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation - topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 24 FALSE FALSE TRUE FALSE FALSE FALSE citation - topicClassValue Term A topic or subject term text 25 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation - topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 26 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 27 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation - publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 28 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 29 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 32 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 33 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 34 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 35 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 36 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 37 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 39 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 40
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 41 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 42 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 43 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 44 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 45 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 46 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 49 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 51 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 53 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 54
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 55 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 56 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 57 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 58 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 59 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 61 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 62 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 64 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 65 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 66 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 67 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 68 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 69 #VALUE FALSE TRUE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 70 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 71 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 75 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + keywordTermURI Term URI A URI that points to the web presence of the Keyword Term https:// url 22 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabulary Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 23 (#VALUE) FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + keywordVocabularyURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 24 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE keyword citation + topicClassification Topic Classification Indicates a broad, important topic or subject that the Dataset covers and information about any controlled vocabulary used none 25 FALSE FALSE TRUE FALSE FALSE FALSE citation + topicClassValue Term A topic or subject term text 26 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE topicClassification citation + topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation + publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy + publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -138,189 +139,189 @@ authorIdentifierScheme DAI 5 authorIdentifierScheme ResearcherID 6 authorIdentifierScheme ScopusID 7 - language Abkhaz 0 - language Afar 1 aar aa - language Afrikaans 2 afr af - language Akan 3 aka ak - language Albanian 4 sqi alb sq - language Amharic 5 amh am - language Arabic 6 ara ar - language Aragonese 7 arg an - language Armenian 8 hye arm hy - language Assamese 9 asm as - language Avaric 10 ava av - language Avestan 11 ave ae - language Aymara 12 aym ay - language Azerbaijani 13 aze az - language Bambara 14 bam bm - language Bashkir 15 bak ba - language Basque 16 eus baq eu - language Belarusian 17 bel be - language Bengali, Bangla 18 ben bn - language Bihari 19 bih bh - language Bislama 20 bis bi - language Bosnian 21 bos bs - language Breton 22 bre br - language Bulgarian 23 bul bg - language Burmese 24 mya bur my - language Catalan,Valencian 25 cat ca - language Chamorro 26 cha ch - language Chechen 27 che ce - language Chichewa, Chewa, Nyanja 28 nya ny - language Chinese 29 zho chi zh - language Chuvash 30 chv cv - language Cornish 31 cor kw - language Corsican 32 cos co - language Cree 33 cre cr - language Croatian 34 hrv src hr - language Czech 35 ces cze cs - language Danish 36 dan da - language Divehi, Dhivehi, Maldivian 37 div dv - language Dutch 38 nld dut nl - language Dzongkha 39 dzo dz - language English 40 eng en - language Esperanto 41 epo eo - language Estonian 42 est et - language Ewe 43 ewe ee - language Faroese 44 fao fo - language Fijian 45 fij fj - language Finnish 46 fin fi - language French 47 fra fre fr - language Fula, Fulah, Pulaar, Pular 48 ful ff - language Galician 49 glg gl - language Georgian 50 kat geo ka - language German 51 deu ger de - language Greek (modern) 52 gre ell el - language Guaraní 53 grn gn - language Gujarati 54 guj gu - language Haitian, Haitian Creole 55 hat ht - language Hausa 56 hau ha - language Hebrew (modern) 57 heb he - language Herero 58 her hz - language Hindi 59 hin hi - language Hiri Motu 60 hmo ho - language Hungarian 61 hun hu - language Interlingua 62 ina ia - language Indonesian 63 ind id - language Interlingue 64 ile ie - language Irish 65 gle ga - language Igbo 66 ibo ig - language Inupiaq 67 ipk ik - language Ido 68 ido io - language Icelandic 69 isl ice is - language Italian 70 ita it - language Inuktitut 71 iku iu - language Japanese 72 jpn ja - language Javanese 73 jav jv - language Kalaallisut, Greenlandic 74 kal kl - language Kannada 75 kan kn - language Kanuri 76 kau kr - language Kashmiri 77 kas ks - language Kazakh 78 kaz kk - language Khmer 79 khm km - language Kikuyu, Gikuyu 80 kik ki - language Kinyarwanda 81 kin rw - language Kyrgyz 82 - language Komi 83 kom kv - language Kongo 84 kon kg - language Korean 85 kor ko - language Kurdish 86 kur ku - language Kwanyama, Kuanyama 87 kua kj - language Latin 88 lat la - language Luxembourgish, Letzeburgesch 89 ltz lb - language Ganda 90 lug lg - language Limburgish, Limburgan, Limburger 91 lim li - language Lingala 92 lin ln - language Lao 93 lao lo - language Lithuanian 94 lit lt - language Luba-Katanga 95 lub lu - language Latvian 96 lav lv - language Manx 97 glv gv - language Macedonian 98 mkd mac mk - language Malagasy 99 mlg mg - language Malay 100 may msa ms - language Malayalam 101 mal ml - language Maltese 102 mlt mt - language Māori 103 mao mri mi - language Marathi (Marāṭhī) 104 mar mr - language Marshallese 105 mah mh - language Mixtepec Mixtec 106 mix - language Mongolian 107 mon mn - language Nauru 108 nau na - language Navajo, Navaho 109 nav nv - language Northern Ndebele 110 nde nd - language Nepali 111 nep ne - language Ndonga 112 ndo ng - language Norwegian Bokmål 113 nob nb - language Norwegian Nynorsk 114 nno nn - language Norwegian 115 nor no - language Nuosu 116 - language Southern Ndebele 117 nbl nr - language Occitan 118 oci oc - language Ojibwe, Ojibwa 119 oji oj - language Old Church Slavonic,Church Slavonic,Old Bulgarian 120 chu cu - language Oromo 121 orm om - language Oriya 122 ori or - language Ossetian, Ossetic 123 oss os - language Panjabi, Punjabi 124 pan pa - language Pāli 125 pli pi - language Persian (Farsi) 126 per fas fa - language Polish 127 pol pl - language Pashto, Pushto 128 pus ps - language Portuguese 129 por pt - language Quechua 130 que qu - language Romansh 131 roh rm - language Kirundi 132 run rn - language Romanian 133 ron rum ro - language Russian 134 rus ru - language Sanskrit (Saṁskṛta) 135 san sa - language Sardinian 136 srd sc - language Sindhi 137 snd sd - language Northern Sami 138 sme se - language Samoan 139 smo sm - language Sango 140 sag sg - language Serbian 141 srp scc sr - language Scottish Gaelic, Gaelic 142 gla gd - language Shona 143 sna sn - language Sinhala, Sinhalese 144 sin si - language Slovak 145 slk slo sk - language Slovene 146 slv sl - language Somali 147 som so - language Southern Sotho 148 sot st - language Spanish, Castilian 149 spa es - language Sundanese 150 sun su - language Swahili 151 swa sw - language Swati 152 ssw ss - language Swedish 153 swe sv - language Tamil 154 tam ta - language Telugu 155 tel te - language Tajik 156 tgk tg - language Thai 157 tha th - language Tigrinya 158 tir ti - language Tibetan Standard, Tibetan, Central 159 tib bod bo - language Turkmen 160 tuk tk - language Tagalog 161 tgl tl - language Tswana 162 tsn tn - language Tonga (Tonga Islands) 163 ton to - language Turkish 164 tur tr - language Tsonga 165 tso ts - language Tatar 166 tat tt - language Twi 167 twi tw - language Tahitian 168 tah ty - language Uyghur, Uighur 169 uig ug - language Ukrainian 170 ukr uk - language Urdu 171 urd ur - language Uzbek 172 uzb uz - language Venda 173 ven ve - language Vietnamese 174 vie vi - language Volapük 175 vol vo - language Walloon 176 wln wa - language Welsh 177 cym wel cy - language Wolof 178 wol wo - language Western Frisian 179 fry fy - language Xhosa 180 xho xh - language Yiddish 181 yid yi - language Yoruba 182 yor yo - language Zhuang, Chuang 183 zha za - language Zulu 184 zul zu + language Abkhaz abk 0 abk ab + language Afar aar 1 aar aa + language Afrikaans afr 2 afr af + language Akan aka 3 aka ak + language Albanian sqi 4 sqi alb sq + language Amharic amh 5 amh am + language Arabic ara 6 ara ar + language Aragonese arg 7 arg an + language Armenian hye 8 hye arm hy + language Assamese asm 9 asm as + language Avaric ava 10 ava av + language Avestan ave 11 ave ae + language Aymara aym 12 aym ay + language Azerbaijani aze 13 aze az + language Bambara bam 14 bam bm + language Bashkir bak 15 bak ba + language Basque eus 16 eus baq eu + language Belarusian bel 17 bel be + language Bengali, Bangla ben 18 ben bn Bengali Bangla + language Bihari bih 19 bih bh + language Bislama bis 20 bis bi + language Bosnian bos 21 bos bs + language Breton bre 22 bre br + language Bulgarian bul 23 bul bg + language Burmese mya 24 mya bur my + language Catalan,Valencian cat 25 cat ca Catalan Valencian + language Chamorro cha 26 cha ch + language Chechen che 27 che ce + language Chichewa, Chewa, Nyanja nya 28 nya ny Chichewa Chewa Nyanja + language Chinese zho 29 zho chi zh + language Chuvash chv 30 chv cv + language Cornish cor 31 cor kw + language Corsican cos 32 cos co + language Cree cre 33 cre cr + language Croatian hrv 34 hrv src hr + language Czech ces 35 ces cze cs + language Danish dan 36 dan da + language Divehi, Dhivehi, Maldivian div 37 div dv Divehi Dhivehi Maldivian + language Dutch nld 38 nld dut nl + language Dzongkha dzo 39 dzo dz + language English eng 40 eng en + language Esperanto epo 41 epo eo + language Estonian est 42 est et + language Ewe ewe 43 ewe ee + language Faroese fao 44 fao fo + language Fijian fij 45 fij fj + language Finnish fin 46 fin fi + language French fra 47 fra fre fr + language Fula, Fulah, Pulaar, Pular ful 48 ful ff Fula Fulah Pulaar Pular + language Galician glg 49 glg gl + language Georgian kat 50 kat geo ka + language German deu 51 deu ger de + language Greek (modern) ell 52 ell gre el Greek + language Guaraní grn 53 grn gn + language Gujarati guj 54 guj gu + language Haitian, Haitian Creole hat 55 hat ht Haitian Haitian Creole + language Hausa hau 56 hau ha + language Hebrew (modern) heb 57 heb he + language Herero her 58 her hz + language Hindi hin 59 hin hi + language Hiri Motu hmo 60 hmo ho + language Hungarian hun 61 hun hu + language Interlingua ina 62 ina ia + language Indonesian ind 63 ind id + language Interlingue ile 64 ile ie + language Irish gle 65 gle ga + language Igbo ibo 66 ibo ig + language Inupiaq ipk 67 ipk ik + language Ido ido 68 ido io + language Icelandic isl 69 isl ice is + language Italian ita 70 ita it + language Inuktitut iku 71 iku iu + language Japanese jpn 72 jpn ja + language Javanese jav 73 jav jv + language Kalaallisut, Greenlandic kal 74 kal kl Kalaallisut Greenlandic + language Kannada kan 75 kan kn + language Kanuri kau 76 kau kr + language Kashmiri kas 77 kas ks + language Kazakh kaz 78 kaz kk + language Khmer khm 79 khm km + language Kikuyu, Gikuyu kik 80 kik ki Kikuyu Gikuyu + language Kinyarwanda kin 81 kin rw + language Kyrgyz kir 82 kir ky Kirghiz + language Komi kom 83 kom kv + language Kongo kon 84 kon kg + language Korean kor 85 kor ko + language Kurdish kur 86 kur ku + language Kwanyama, Kuanyama kua 87 kua kj Kwanyama Kuanyama + language Latin lat 88 lat la + language Luxembourgish, Letzeburgesch ltz 89 ltz lb Luxembourgish Letzeburgesch + language Ganda lug 90 lug lg + language Limburgish, Limburgan, Limburger lim 91 lim li Limburgish Limburgan Limburger + language Lingala lin 92 lin ln + language Lao lao 93 lao lo + language Lithuanian lit 94 lit lt + language Luba-Katanga lub 95 lub lu + language Latvian lav 96 lav lv + language Manx glv 97 glv gv + language Macedonian mkd 98 mkd mac mk + language Malagasy mlg 99 mlg mg + language Malay msa 100 msa may ms + language Malayalam mal 101 mal ml + language Maltese mlt 102 mlt mt + language Māori mri 103 mri mao mi Maori + language Marathi (Marāṭhī) mar 104 mar mr + language Marshallese mah 105 mah mh + language Mixtepec Mixtec mix 106 mix + language Mongolian mon 107 mon mn + language Nauru nau 108 nau na + language Navajo, Navaho nav 109 nav nv Navajo Navaho + language Northern Ndebele nde 110 nde nd + language Nepali nep 111 nep ne + language Ndonga ndo 112 ndo ng + language Norwegian Bokmål nob 113 nob nb + language Norwegian Nynorsk nno 114 nno nn + language Norwegian nor 115 nor no + language Nuosu iii 116 iii ii Sichuan Yi + language Southern Ndebele nbl 117 nbl nr + language Occitan oci 118 oci oc + language Ojibwe, Ojibwa oji 119 oji oj Ojibwe Ojibwa + language Old Church Slavonic,Church Slavonic,Old Bulgarian chu 120 chu cu + language Oromo orm 121 orm om + language Oriya ori 122 ori or + language Ossetian, Ossetic oss 123 oss os Ossetian Ossetic + language Panjabi, Punjabi pan 124 pan pa Panjabi Punjabi + language Pāli pli 125 pli pi + language Persian (Farsi) fas 126 fas per fa + language Polish pol 127 pol pl + language Pashto, Pushto pus 128 pus ps Pashto Pushto + language Portuguese por 129 por pt + language Quechua que 130 que qu + language Romansh roh 131 roh rm + language Kirundi run 132 run rn + language Romanian ron 133 ron rum ro + language Russian rus 134 rus ru + language Sanskrit (Saṁskṛta) san 135 san sa + language Sardinian srd 136 srd sc + language Sindhi snd 137 snd sd + language Northern Sami sme 138 sme se + language Samoan smo 139 smo sm + language Sango sag 140 sag sg + language Serbian srp 141 srp scc sr + language Scottish Gaelic, Gaelic gla 142 gla gd Scottish Gaelic Gaelic + language Shona sna 143 sna sn + language Sinhala, Sinhalese sin 144 sin si Sinhala Sinhalese + language Slovak slk 145 slk slo sk + language Slovene slv 146 slv sl Slovenian + language Somali som 147 som so + language Southern Sotho sot 148 sot st + language Spanish, Castilian spa 149 spa es Spanish Castilian + language Sundanese sun 150 sun su + language Swahili swa 151 swa sw + language Swati ssw 152 ssw ss + language Swedish swe 153 swe sv + language Tamil tam 154 tam ta + language Telugu tel 155 tel te + language Tajik tgk 156 tgk tg + language Thai tha 157 tha th + language Tigrinya tir 158 tir ti + language Tibetan Standard, Tibetan, Central bod 159 bod tib bo Tibetan Standard Tibetan Central + language Turkmen tuk 160 tuk tk + language Tagalog tgl 161 tgl tl + language Tswana tsn 162 tsn tn + language Tonga (Tonga Islands) ton 163 ton to Tonga + language Turkish tur 164 tur tr + language Tsonga tso 165 tso ts + language Tatar tat 166 tat tt + language Twi twi 167 twi tw + language Tahitian tah 168 tah ty + language Uyghur, Uighur uig 169 uig ug Uyghur Uighur + language Ukrainian ukr 170 ukr uk + language Urdu urd 171 urd ur + language Uzbek uzb 172 uzb uz + language Venda ven 173 ven ve + language Vietnamese vie 174 vie vi + language Volapük vol 175 vol vo + language Walloon wln 176 wln wa + language Welsh cym 177 cym wel cy + language Wolof wol 178 wol wo + language Western Frisian fry 179 fry fy + language Xhosa xho 180 xho xh + language Yiddish yid 181 yid yi + language Yoruba yor 182 yor yo + language Zhuang, Chuang zha 183 zha za Zhuang Chuang + language Zulu zul 184 zul zu language Not applicable 185 diff --git a/scripts/api/data/metadatablocks/computational_workflow.tsv b/scripts/api/data/metadatablocks/computational_workflow.tsv index 51b69cfdb80..3cd0c26a464 100644 --- a/scripts/api/data/metadatablocks/computational_workflow.tsv +++ b/scripts/api/data/metadatablocks/computational_workflow.tsv @@ -2,7 +2,7 @@ computationalworkflow Computational Workflow Metadata #datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id termURI workflowType Computational Workflow Type The kind of Computational Workflow, which is designed to compose and execute a series of computational or data manipulation steps in a scientific application text 0 TRUE TRUE TRUE TRUE TRUE FALSE computationalworkflow - workflowCodeRepository External Code Repository URL A link to the repository where the un-compiled, human readable code and related code is located (e.g. GitHub, GitLab, SVN) https://... url 1 FALSE FALSE TRUE FALSE TRUE FALSE computationalworkflow + workflowCodeRepository External Code Repository URL A link to the repository where the un-compiled, human readable code and related code is located (e.g. GitHub, GitLab, SVN) https://... url 1 #VALUE FALSE FALSE TRUE FALSE TRUE FALSE computationalworkflow workflowDocumentation Documentation A link (URL) to the documentation or text describing the Computational Workflow and its use textbox 2 FALSE FALSE TRUE FALSE TRUE FALSE computationalworkflow #controlledVocabulary DatasetField Value identifier displayOrder workflowType Common Workflow Language (CWL) workflowtype_cwl 1 diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index a3a8e7efd58..11408317410 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -8,10 +8,10 @@ otherGeographicCoverage Other Other information on the geographic coverage of the data. text 4 #VALUE, FALSE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial geographicUnit Geographic Unit Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. text 5 TRUE FALSE TRUE TRUE FALSE FALSE geospatial geographicBoundingBox Geographic Bounding Box The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. none 6 FALSE FALSE TRUE FALSE FALSE FALSE geospatial - westLongitude West Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - eastLongitude East Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - northLongitude North Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial - southLongitude South Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + westLongitude Westernmost (Left) Longitude Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= West Bounding Longitude Value <= 180.0. text 7 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + eastLongitude Easternmost (Right) Longitude Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= East Bounding Longitude Value <= 180.0. text 8 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + northLatitude Northernmost (Top) Latitude Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= North Bounding Latitude Value <= 90.0. text 9 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial + southLatitude Southernmost (Bottom) Latitude Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= South Bounding Latitude Value <= 90.0. text 10 FALSE FALSE FALSE FALSE FALSE FALSE geographicBoundingBox geospatial #controlledVocabulary DatasetField Value identifier displayOrder country Afghanistan 0 country Albania 1 diff --git a/scripts/api/data/storageSites/add-storage-site.json b/scripts/api/data/storageSites/add-storage-site.json deleted file mode 100644 index d13ec2f165d..00000000000 --- a/scripts/api/data/storageSites/add-storage-site.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "hostname": "dataverse.librascholar.edu", - "name": "LibraScholar, USA", - "primaryStorage": true, - "transferProtocols": "rsync,posix,globus" -} diff --git a/scripts/api/setup-all.sh b/scripts/api/setup-all.sh index e247caa72b5..b7f962209e4 100755 --- a/scripts/api/setup-all.sh +++ b/scripts/api/setup-all.sh @@ -57,10 +57,6 @@ echo "- Allow internal signup" curl -X PUT -d yes "${DATAVERSE_URL}/api/admin/settings/:AllowSignUp" curl -X PUT -d "/dataverseuser.xhtml?editMode=CREATE" "${DATAVERSE_URL}/api/admin/settings/:SignUpUrl" -curl -X PUT -d doi "${DATAVERSE_URL}/api/admin/settings/:Protocol" -curl -X PUT -d 10.5072 "${DATAVERSE_URL}/api/admin/settings/:Authority" -curl -X PUT -d "FK2/" "${DATAVERSE_URL}/api/admin/settings/:Shoulder" -curl -X PUT -d DataCite "${DATAVERSE_URL}/api/admin/settings/:DoiProvider" curl -X PUT -d burrito "${DATAVERSE_URL}/api/admin/settings/BuiltinUsers.KEY" curl -X PUT -d localhost-only "${DATAVERSE_URL}/api/admin/settings/:BlockedApiPolicy" curl -X PUT -d 'native/http' "${DATAVERSE_URL}/api/admin/settings/:UploadMethods" @@ -69,7 +65,7 @@ echo echo "Setting up the admin user (and as superuser)" adminResp=$(curl -s -H "Content-type:application/json" -X POST -d @"$SCRIPT_PATH"/data/user-admin.json "${DATAVERSE_URL}/api/builtin-users?password=$DV_SU_PASSWORD&key=burrito") echo "$adminResp" -curl -X POST "${DATAVERSE_URL}/api/admin/superuser/dataverseAdmin" +curl -X PUT "${DATAVERSE_URL}/api/admin/superuser/dataverseAdmin" -d "true" echo echo "Setting up the root dataverse" diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index fcbcc08a8e6..1311464e8ff 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -3,6 +3,7 @@ SERVER=http://localhost:8080/api echo "Setting up Harvard-specific settings" # :Authority and :Shoulder are commented out so this script can be used on test servers +# Should now use the new multipid JVM options instead of these settings #curl -X PUT -d 10.7910 "$SERVER/admin/settings/:Authority" #curl -X PUT -d "DVN/" "$SERVER/admin/settings/:Shoulder" echo "- Application Status header" diff --git a/scripts/deploy/phoenix.dataverse.org/post b/scripts/deploy/phoenix.dataverse.org/post index e4c8817844b..9d37c183a1a 100755 --- a/scripts/deploy/phoenix.dataverse.org/post +++ b/scripts/deploy/phoenix.dataverse.org/post @@ -4,7 +4,6 @@ cd scripts/api cd ../.. psql -U dvnapp dvndb -f scripts/database/reference_data.sql psql -U dvnapp dvndb -f doc/sphinx-guides/source/_static/util/createsequence.sql -curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE scripts/search/tests/publish-dataverse-root git checkout scripts/api/data/dv-root.json scripts/search/tests/grant-authusers-add-on-root diff --git a/scripts/dev/dev-rebuild.sh b/scripts/dev/dev-rebuild.sh index 9eae195b135..898212b4664 100755 --- a/scripts/dev/dev-rebuild.sh +++ b/scripts/dev/dev-rebuild.sh @@ -56,9 +56,6 @@ cd ../.. echo "Creating SQL sequence..." psql -h localhost -U $DB_USER $DB_NAME -f doc/sphinx-guides/source/_static/util/createsequence.sql -echo "Setting DOI provider to \"FAKE\"..." -curl http://localhost:8080/api/admin/settings/:DoiProvider -X PUT -d FAKE - echo "Allowing GUI edits to be visible without redeploy..." $PAYARA_DIR/glassfish/bin/asadmin create-system-properties "dataverse.jsf.refresh-period=1" diff --git a/scripts/dev/docker-final-setup.sh b/scripts/dev/docker-final-setup.sh index d2453619ec2..e20ce7ad6b6 100755 --- a/scripts/dev/docker-final-setup.sh +++ b/scripts/dev/docker-final-setup.sh @@ -10,9 +10,6 @@ cd ../.. echo "Setting system mail address..." curl -X PUT -d "dataverse@localhost" "http://localhost:8080/api/admin/settings/:SystemEmail" -echo "Setting DOI provider to \"FAKE\"..." -curl "http://localhost:8080/api/admin/settings/:DoiProvider" -X PUT -d FAKE - API_TOKEN=$(grep apiToken "/tmp/setup-all.sh.out" | jq ".data.apiToken" | tr -d \") export API_TOKEN diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index 399bc65168a..8ea95534986 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -55,13 +55,13 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} @echo copying jhove schema file /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} +${SOLR_SCHEMA}: ../../conf/solr/schema.xml ../../conf/solr/update-fields.sh ${INSTALLER_ZIP_DIR} @echo copying Solr schema file - /bin/cp ../../conf/solr/9.3.0/schema.xml ../../conf/solr/9.3.0/update-fields.sh ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/schema.xml ../../conf/solr/update-fields.sh ${INSTALLER_ZIP_DIR} -${SOLR_CONFIG}: ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} +${SOLR_CONFIG}: ../../conf/solr/solrconfig.xml ${INSTALLER_ZIP_DIR} @echo copying Solr config file - /bin/cp ../../conf/solr/9.3.0/solrconfig.xml ${INSTALLER_ZIP_DIR} + /bin/cp ../../conf/solr/solrconfig.xml ${INSTALLER_ZIP_DIR} ${PYTHON_FILES}: README_python.txt install.py installConfig.py installAppServer.py installUtils.py requirements.txt default.config interactive.config ${INSTALLER_ZIP_DIR} @echo copying Python installer files diff --git a/scripts/installer/as-setup.sh b/scripts/installer/as-setup.sh index fc5b378cff5..e87122ba77c 100755 --- a/scripts/installer/as-setup.sh +++ b/scripts/installer/as-setup.sh @@ -102,23 +102,22 @@ function preliminary_setup() # password reset token timeout in minutes ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.auth.password-reset-timeout-in-minutes=60" - # DataCite DOI Settings + # Fake DOI Settings # (we can no longer offer EZID with their shared test account) # jvm-options use colons as separators, escape as literal DOI_BASEURL_ESC=`echo $DOI_BASEURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.username=${DOI_USERNAME}" - ./asadmin $ASADMIN_OPTS create-jvm-options '\-Ddataverse.pid.datacite.password=${ALIAS=doi_password_alias}' - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.mds-api-url=$DOI_BASEURL_ESC" - + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.providers=fake" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.type=FAKE" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.label=Fake DOI Provider" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.authority=10.5072" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.fake.shoulder=FK2/" + ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.default-provider=fake" # jvm-options use colons as separators, escape as literal - DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` - ./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" + #DOI_DATACITERESTAPIURL_ESC=`echo $DOI_DATACITERESTAPIURL | sed -e 's/:/\\\:/'` + #./asadmin $ASADMIN_OPTS create-jvm-options "\-Ddataverse.pid.testDC.datacite.rest-api-url=$DOI_DATACITERESTAPIURL_ESC" ./asadmin $ASADMIN_OPTS create-jvm-options "-Ddataverse.timerServer=true" - # Workaround for FISH-7722: Failed to deploy war with @Stateless https://github.com/payara/Payara/issues/6337 - ./asadmin $ASADMIN_OPTS create-jvm-options --add-opens=java.base/java.io=ALL-UNNAMED - # enable comet support ./asadmin $ASADMIN_OPTS set server-config.network-config.protocols.protocol.http-listener-1.http.comet-support-enabled="true" @@ -146,12 +145,10 @@ function final_setup(){ # delete any existing mail/notifyMailSession; configure port, if provided: ./asadmin delete-javamail-resource mail/notifyMailSession - - if [ $SMTP_SERVER_PORT"x" != "x" ] - then - ./asadmin $ASADMIN_OPTS create-javamail-resource --mailhost "$SMTP_SERVER" --mailuser "dataversenotify" --fromaddress "do-not-reply@${HOST_ADDRESS}" --property mail.smtp.port="${SMTP_SERVER_PORT}" mail/notifyMailSession - else - ./asadmin $ASADMIN_OPTS create-javamail-resource --mailhost "$SMTP_SERVER" --mailuser "dataversenotify" --fromaddress "do-not-reply@${HOST_ADDRESS}" mail/notifyMailSession + ./asadmin $ASADMIN_OPTS create-system-properties "dataverse.mail.system-email='${ADMIN_EMAIL}'" + ./asadmin $ASADMIN_OPTS create-system-properties "dataverse.mail.mta.host='${SMTP_SERVER}'" + if [ "x${SMTP_SERVER_PORT}" != "x" ]; then + ./asadmin $ASADMIN_OPTS create-system-properties "dataverse.mail.mta.port='${SMTP_SERVER_PORT}'" fi } @@ -279,6 +276,12 @@ if [ ! -d "$DOMAIN_DIR" ] exit 2 fi +if [ -z "$ADMIN_EMAIL" ] + then + echo "You must specify the system admin email address (ADMIN_EMAIL)." + exit 1 +fi + echo "Setting up your app. server (Payara) to support Dataverse" echo "Payara directory: "$GLASSFISH_ROOT echo "Domain directory: "$DOMAIN_DIR diff --git a/scripts/installer/install.py b/scripts/installer/install.py index 5a7b9f75696..005fbad46e0 100644 --- a/scripts/installer/install.py +++ b/scripts/installer/install.py @@ -422,9 +422,13 @@ conn.close() if int(pg_major_version) >= 15: + admin_conn_string = "dbname='"+pgDb+"' user='postgres' password='"+pgAdminPassword+"' host='"+pgHost+"'" + conn = psycopg2.connect(admin_conn_string) + conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = conn.cursor() conn_cmd = "GRANT CREATE ON SCHEMA public TO "+pgUser+";" - print("PostgreSQL 15 or higher detected. Running " + conn_cmd) try: + print("PostgreSQL 15 or higher detected. Running " + conn_cmd) cur.execute(conn_cmd) except: if force: @@ -564,14 +568,6 @@ except: sys.exit("Failure to execute setup-all.sh! aborting.") -# 7b. configure admin email in the application settings -print("configuring system email address...") -returnCode = subprocess.call(["curl", "-X", "PUT", "-d", adminEmail, apiUrl+"/admin/settings/:SystemEmail"]) -if returnCode != 0: - print("\nWARNING: failed to configure the admin email in the Dataverse settings!") -else: - print("\ndone.") - # 8c. configure remote Solr location, if specified if solrLocation != "LOCAL": print("configuring remote Solr location... ("+solrLocation+")") @@ -587,15 +583,14 @@ print("\n\nYou should now have a running Dataverse instance at") print(" http://" + hostName + ":8080\n\n") -# DataCite instructions: +# PID instructions: -print("\nYour Dataverse has been configured to use DataCite, to register DOI global identifiers in the ") +print("\nYour Dataverse has been configured to use a Fake DOI Provider, registering (non-resolvable) DOI global identifiers in the ") print("test name space \"10.5072\" with the \"shoulder\" \"FK2\"") -print("However, you have to contact DataCite (support\@datacite.org) and request a test account, before you ") -print("can publish datasets. Once you receive the account name and password, add them to your domain.xml,") -print("as the following two JVM options:") -print("\t-Ddataverse.pid.datacite.username=...") -print("\t-Ddataverse.pid.datacite.password=...") +print("You can reconfigure to use additional/alternative providers.") +print("If you intend to use DOIs, you should contact DataCite (support\@datacite.org) or GDCC (see https://www.gdcc.io/about.html) and request a test account.") +print("Once you receive the account information (name, password, authority, shoulder), add them to your configuration ") +print("as described in the Dataverse Guides (see https://guides.dataverse.org/en/latest/installation/config.html#persistent-identifiers-and-publishing-datasets),") print("and restart payara") print("If this is a production Dataverse and you are planning to register datasets as ") print("\"real\", non-test DOIs or Handles, consult the \"Persistent Identifiers and Publishing Datasets\"") diff --git a/scripts/installer/installAppServer.py b/scripts/installer/installAppServer.py index 698f5ba9a58..7636490c583 100644 --- a/scripts/installer/installAppServer.py +++ b/scripts/installer/installAppServer.py @@ -6,8 +6,9 @@ def runAsadminScript(config): # commands to set up all the app. server (payara6) components for the application. # All the parameters must be passed to that script as environmental # variables: - os.environ['GLASSFISH_DOMAIN'] = "domain1"; - os.environ['ASADMIN_OPTS'] = ""; + os.environ['GLASSFISH_DOMAIN'] = "domain1" + os.environ['ASADMIN_OPTS'] = "" + os.environ['ADMIN_EMAIL'] = config.get('system','ADMIN_EMAIL') os.environ['HOST_ADDRESS'] = config.get('glassfish','HOST_DNS_ADDRESS') os.environ['GLASSFISH_ROOT'] = config.get('glassfish','GLASSFISH_DIRECTORY') diff --git a/src/main/docker/assembly.xml b/src/main/docker/assembly.xml index 9f9b39617a3..62cd910ef9b 100644 --- a/src/main/docker/assembly.xml +++ b/src/main/docker/assembly.xml @@ -3,7 +3,7 @@ - target/${project.artifactId}-${project.version} + target/${project.artifactId} app WEB-INF/lib/**/* @@ -11,7 +11,7 @@ - target/${project.artifactId}-${project.version}/WEB-INF/lib + target/${project.artifactId}/WEB-INF/lib deps diff --git a/src/main/docker/scripts/init_2_configure.sh b/src/main/docker/scripts/init_2_configure.sh index a98f08088c1..b31cfac37b7 100755 --- a/src/main/docker/scripts/init_2_configure.sh +++ b/src/main/docker/scripts/init_2_configure.sh @@ -31,10 +31,6 @@ echo "# Dataverse postboot configuration for Payara" > "${DV_POSTBOOT}" # EE 8 code annotations or at least glassfish-resources.xml # NOTE: postboot commands is not multi-line capable, thus spaghetti needed. -# JavaMail -echo "INFO: Defining JavaMail." -echo "create-javamail-resource --mailhost=${DATAVERSE_MAIL_HOST:-smtp} --mailuser=${DATAVERSE_MAIL_USER:-dataversenotify} --fromaddress=${DATAVERSE_MAIL_FROM:-dataverse@localhost} mail/notifyMailSession" >> "${DV_POSTBOOT}" - # 3. Domain based configuration options # Set Dataverse environment variables echo "INFO: Defining system properties for Dataverse configuration options." diff --git a/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh b/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh new file mode 100644 index 00000000000..c234ad33307 --- /dev/null +++ b/src/main/docker/scripts/init_3_wait_dataverse_db_host.sh @@ -0,0 +1,4 @@ +#It was reported on 9949 that on the first launch of the containers Dataverse would not be deployed on payara +#this was caused by a race condition due postgress not being ready. A solion for docker compose was prepared +#but didn't work due a compatibility issue on the Maven pluggin [https://github.com/fabric8io/docker-maven-plugin/issues/888] +wait-for "${DATAVERSE_DB_HOST:-postgres}:${DATAVERSE_DB_PORT:-5432}" -t 120 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java deleted file mode 100644 index f1bfc3e290b..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/AbstractGlobalIdServiceBean.java +++ /dev/null @@ -1,700 +0,0 @@ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.SystemConfig; -import java.io.InputStream; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; -import java.util.*; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.commons.lang3.RandomStringUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -public abstract class AbstractGlobalIdServiceBean implements GlobalIdServiceBean { - - private static final Logger logger = Logger.getLogger(AbstractGlobalIdServiceBean.class.getCanonicalName()); - - @Inject - DataverseServiceBean dataverseService; - @EJB - protected - SettingsServiceBean settingsService; - @Inject - protected - DvObjectServiceBean dvObjectService; - @Inject - SystemConfig systemConfig; - - protected Boolean configured = null; - - public static String UNAVAILABLE = ":unav"; - - @Override - public Map getMetadataForCreateIndicator(DvObject dvObjectIn) { - logger.log(Level.FINE,"getMetadataForCreateIndicator(DvObject)"); - Map metadata = new HashMap<>(); - metadata = addBasicMetadata(dvObjectIn, metadata); - metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); - metadata.put("_target", getTargetUrl(dvObjectIn)); - return metadata; - } - - protected Map getUpdateMetadata(DvObject dvObjectIn) { - logger.log(Level.FINE,"getUpdateMetadataFromDataset"); - Map metadata = new HashMap<>(); - metadata = addBasicMetadata(dvObjectIn, metadata); - return metadata; - } - - protected Map addBasicMetadata(DvObject dvObjectIn, Map metadata) { - - String authorString = dvObjectIn.getAuthorString(); - if (authorString.isEmpty() || authorString.contains(DatasetField.NA_VALUE)) { - authorString = UNAVAILABLE; - } - - String producerString = dataverseService.getRootDataverseName(); - - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { - producerString = UNAVAILABLE; - } - - String titleString = dvObjectIn.getCurrentName(); - - if (titleString.isEmpty() || titleString.equals(DatasetField.NA_VALUE)) { - titleString = UNAVAILABLE; - } - - metadata.put("datacite.creator", authorString); - metadata.put("datacite.title", titleString); - metadata.put("datacite.publisher", producerString); - metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); - return metadata; - } - - protected Map addDOIMetadataForDestroyedDataset(DvObject dvObjectIn) { - Map metadata = new HashMap<>(); - String authorString = UNAVAILABLE; - String producerString = UNAVAILABLE; - String titleString = "This item has been removed from publication"; - - metadata.put("datacite.creator", authorString); - metadata.put("datacite.title", titleString); - metadata.put("datacite.publisher", producerString); - metadata.put("datacite.publicationyear", "9999"); - return metadata; - } - - protected String getTargetUrl(DvObject dvObjectIn) { - logger.log(Level.FINE,"getTargetUrl"); - return systemConfig.getDataverseSiteUrl() + dvObjectIn.getTargetUrl() + dvObjectIn.getGlobalId().asString(); - } - - @Override - public String getIdentifier(DvObject dvObject) { - GlobalId gid = dvObject.getGlobalId(); - return gid != null ? gid.asString() : null; - } - - protected String generateYear (DvObject dvObjectIn){ - return dvObjectIn.getYearPublishedCreated(); - } - - public Map getMetadataForTargetURL(DvObject dvObject) { - logger.log(Level.FINE,"getMetadataForTargetURL"); - HashMap metadata = new HashMap<>(); - metadata.put("_target", getTargetUrl(dvObject)); - return metadata; - } - - @Override - public boolean alreadyRegistered(DvObject dvo) throws Exception { - if(dvo==null) { - logger.severe("Null DvObject sent to alreadyRegistered()."); - return false; - } - GlobalId globalId = dvo.getGlobalId(); - if(globalId == null) { - return false; - } - return alreadyRegistered(globalId, false); - } - - public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; - - /* - * ToDo: the DvObject being sent in provides partial support for the case where - * it has a different authority/protocol than what is configured (i.e. a legacy - * Pid that can actually be updated by the Pid account being used.) Removing - * this now would potentially break/make it harder to handle that case prior to - * support for configuring multiple Pid providers. Once that exists, it would be - * cleaner to always find the PidProvider associated with the - * protocol/authority/shoulder of the current dataset and then not pass the - * DvObject as a param. (This would also remove calls to get the settings since - * that would be done at construction.) - */ - @Override - public DvObject generateIdentifier(DvObject dvObject) { - - String protocol = dvObject.getProtocol() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : dvObject.getProtocol(); - String authority = dvObject.getAuthority() == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : dvObject.getAuthority(); - if (dvObject.isInstanceofDataset()) { - dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); - } else { - dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject)); - } - if (dvObject.getProtocol() == null) { - dvObject.setProtocol(protocol); - } - if (dvObject.getAuthority() == null) { - dvObject.setAuthority(authority); - } - return dvObject; - } - - //ToDo just send the DvObject.DType - public String generateDatasetIdentifier(Dataset dataset) { - //ToDo - track these in the bean - String identifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - - switch (identifierType) { - case "randomString": - return generateIdentifierAsRandomString(dataset, shoulder); - case "storedProcGenerated": - return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder); - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(dataset, shoulder); - } - } - - - /** - * Check that a identifier entered by the user is unique (not currently used - * for any other study in this Dataverse Network) also check for duplicate - * in EZID if needed - * @param userIdentifier - * @param dataset - * @return {@code true} if the identifier is unique, {@code false} otherwise. - */ - public boolean isGlobalIdUnique(GlobalId globalId) { - if ( ! dvObjectService.isGlobalIdLocallyUnique(globalId) ) { - return false; // duplication found in local database - } - - // not in local DB, look in the persistent identifier service - try { - return ! alreadyRegistered(globalId, false); - } catch (Exception e){ - //we can live with failure - means identifier not found remotely - } - - return true; - } - - /** - * Parse a Persistent Id and set the protocol, authority, and identifier - * - * Example 1: doi:10.5072/FK2/BYM3IW - * protocol: doi - * authority: 10.5072 - * identifier: FK2/BYM3IW - * - * Example 2: hdl:1902.1/111012 - * protocol: hdl - * authority: 1902.1 - * identifier: 111012 - * - * @param identifierString - * @param separator the string that separates the authority from the identifier. - * @param destination the global id that will contain the parsed data. - * @return {@code destination}, after its fields have been updated, or - * {@code null} if parsing failed. - */ - @Override - public GlobalId parsePersistentId(String fullIdentifierString) { - if(!isConfigured()) { - return null; - } - // Occasionally, the protocol separator character ':' comes in still - // URL-encoded as %3A (usually as a result of the URL having been - // encoded twice): - fullIdentifierString = fullIdentifierString.replace("%3A", ":"); - - int index1 = fullIdentifierString.indexOf(':'); - if (index1 > 0) { // ':' found with one or more characters before it - String protocol = fullIdentifierString.substring(0, index1); - GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1+1)); - return globalId; - } - logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", fullIdentifierString); - return null; - } - - protected GlobalId parsePersistentId(String protocol, String identifierString) { - if(!isConfigured()) { - return null; - } - String authority; - String identifier; - if (identifierString == null) { - return null; - } - int index = identifierString.indexOf('/'); - if (index > 0 && (index + 1) < identifierString.length()) { - // '/' found with one or more characters - // before and after it - // Strip any whitespace, ; and ' from authority (should finding them cause a - // failure instead?) - authority = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(0, index)); - if (GlobalIdServiceBean.testforNullTerminator(authority)) { - return null; - } - identifier = GlobalIdServiceBean.formatIdentifierString(identifierString.substring(index + 1)); - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { - return null; - } - } else { - logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", - identifierString); - return null; - } - return parsePersistentId(protocol, authority, identifier); - } - - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if(!isConfigured()) { - return null; - } - logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getProviderInformation().get(0)); - if(!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) { - return null; - } - return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(), - getProviderInformation().get(0)); - } - - - public String getSeparator() { - //The standard default - return "/"; - } - - @Override - public String generateDataFileIdentifier(DataFile datafile) { - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); - - String prepend = ""; - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())){ - //If format is dependent then pre-pend the dataset identifier - prepend = datafile.getOwner().getIdentifier() + "/"; - datafile.setProtocol(datafile.getOwner().getProtocol()); - datafile.setAuthority(datafile.getOwner().getAuthority()); - } else { - //If there's a shoulder prepend independent identifiers with it - prepend = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, ""); - datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol)); - datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority)); - } - - switch (doiIdentifierType) { - case "randomString": - return generateIdentifierAsRandomString(datafile, prepend); - case "storedProcGenerated": - if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())){ - return generateIdentifierFromStoredProcedureIndependent(datafile, prepend); - } else { - return generateIdentifierFromStoredProcedureDependent(datafile, prepend); - } - default: - /* Should we throw an exception instead?? -- L.A. 4.6.2 */ - return generateIdentifierAsRandomString(datafile, prepend); - } - } - - - /* - * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. - * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) - * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier - */ - private String generateIdentifierAsRandomString(DvObject dvo, String prepend) { - String identifier = null; - do { - identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); - } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - /* - * This method checks locally for a DvObject with the same PID and if that is OK, checks with the PID service. - * @param dvo - the object to check (ToDo - get protocol/authority from this PidProvider object) - * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it could be the shoulder or the parent Dataset identifier - */ - - private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) { - String identifier; - do { - String identifierFromStoredProcedure = dvObjectService.generateNewIdentifierByStoredProcedure(); - // some diagnostics here maybe - is it possible to determine that it's failing - // because the stored procedure hasn't been created in the database? - if (identifierFromStoredProcedure == null) { - return null; - } - identifier = prepend + identifierFromStoredProcedure; - } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - /*This method is only used for DataFiles with DEPENDENT Pids. It is not for Datasets - * - */ - private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) { - String identifier; - Long retVal; - retVal = Long.valueOf(0L); - //ToDo - replace loops with one lookup for largest entry? (the do loop runs ~n**2/2 calls). The check for existingIdentifiers means this is mostly a local loop now, versus involving db or PidProvider calls, but still...) - - // This will catch identifiers already assigned in the current transaction (e.g. - // in FinalizeDatasetPublicationCommand) that haven't been committed to the db - // without having to make a call to the PIDProvider - Set existingIdentifiers = new HashSet(); - List files = datafile.getOwner().getFiles(); - for(DataFile f:files) { - existingIdentifiers.add(f.getIdentifier()); - } - - do { - retVal++; - identifier = prepend + retVal.toString(); - - } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getProviderInformation().get(0)))); - - return identifier; - } - - - class GlobalIdMetadataTemplate { - - - private String template; - - public GlobalIdMetadataTemplate(){ - try (InputStream in = GlobalIdMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = Util.readAndClose(in, "utf-8"); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } - - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } - - public void setProducers(List producers) { - this.producers = producers; - } - - public List getContacts() { - return contacts; - } - - public void setContacts(List contacts) { - this.contacts = contacts; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(List authors) { - this.authors = authors; - } - - - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } - - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - - public GlobalIdMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); - } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); - } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); - } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); - } - } - - public String generateXML(DvObject dvObject) { - // Can't use "UNKNOWN" here because DataCite will respond with "[facet 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; - // FIXME: Investigate why this.publisherYear is sometimes null now that pull request #4606 has been merged. - if (this.publisherYear != null) { - // Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()) - .replace("${title}", this.title) - .replace("${publisher}", this.publisher) - .replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - StringBuilder creatorsElement = new StringBuilder(); - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() && !author.getIdValue().isEmpty() && author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement.append("" + author.getAffiliation().getDisplayValue() + ""); - } - creatorsElement.append(""); - } - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); - - StringBuilder contributorsElement = new StringBuilder(); - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); - } - } - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); - } - contributorsElement.append(""); - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); - - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } - - private String generateRelatedIdentifiers(DvObject dvObject) { - - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (!dataFile.getGlobalId().asString().isEmpty()) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" + dataFile.getGlobalId() + ""); - } - } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } - } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" + df.getOwner().getGlobalId() + ""); - sb.append(""); - } - return sb.toString(); - } - - public void generateFileIdentifiers(DvObject dvObject) { - - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + template.substring(x, template.length() - 1); - - } - - } else { - xmlMetadata = xmlMetadata.replace("${relatedIdentifier}", ""); - } - } - } - - public String getTemplate() { - return template; - } - - public void setTemplate(String templateIn) { - template = templateIn; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } -} - public String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - - Dataset dataset = null; - - if (dvObject instanceof Dataset) { - dataset = (Dataset) dvObject; - } else { - dataset = (Dataset) dvObject.getOwner(); - } - - GlobalIdMetadataTemplate metadataTemplate = new GlobalIdMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); - if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); - } - if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); - } - - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); - String producerString = dataverseService.getRootDataverseName(); - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE) ) { - producerString = UNAVAILABLE; - } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - @Override - public boolean canManagePID() { - //The default expectation is that PID providers are configured to manage some set (i.e. based on protocol/authority/shoulder) of PIDs - return true; - } - - @Override - public boolean isConfigured() { - if(configured==null) { - return false; - } else { - return configured.booleanValue(); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java index 8c96f98ce39..363622ba3bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/AuxiliaryFileServiceBean.java @@ -2,6 +2,7 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -46,6 +47,8 @@ public class AuxiliaryFileServiceBean implements java.io.Serializable { @EJB private SystemConfig systemConfig; + @EJB + StorageUseServiceBean storageUseService; public AuxiliaryFile find(Object pk) { return em.find(AuxiliaryFile.class, pk); @@ -126,6 +129,13 @@ public AuxiliaryFile processAuxiliaryFile(InputStream fileInputStream, DataFile } dataFile.getAuxiliaryFiles().add(auxFile); } + // We've just added this file to storage; increment the StorageUse + // record if needed. + if (auxFile.getFileSize() != null + && auxFile.getFileSize() > 0 + && dataFile.getOwner() != null ) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), auxFile.getFileSize()); + } } catch (IOException ioex) { logger.severe("IO Exception trying to save auxiliary file: " + ioex.getMessage()); throw new InternalServerErrorException(); @@ -181,6 +191,7 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (af == null) { throw new FileNotFoundException(); } + Long auxFileSize = af.getFileSize(); em.remove(af); StorageIO storageIO; storageIO = dataFile.getStorageIO(); @@ -188,6 +199,14 @@ public void deleteAuxiliaryFile(DataFile dataFile, String formatTag, String form if (storageIO.isAuxObjectCached(auxExtension)) { storageIO.deleteAuxObject(auxExtension); } + // We've just deleted this file from storage; update the StorageUse + // record if needed. + if (auxFileSize != null + && auxFileSize > 0 + && dataFile.getOwner() != null) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), (0L - auxFileSize)); + } + } public List findAuxiliaryFiles(DataFile dataFile) { diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java index 214e26965fa..003d1057972 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessage.java @@ -46,7 +46,7 @@ public void setBannerMessageTexts(Collection bannerMessageTex public String getDisplayValue(){ - String retVal = ""; + String retVal = null; for (BannerMessageText msgTxt : this.getBannerMessageTexts()) { if (msgTxt.getLang().equals(BundleUtil.getCurrentLocale().getLanguage())) { retVal = msgTxt.getMessage(); diff --git a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java index 0e757998d58..3961bd064db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/BannerMessageServiceBean.java @@ -46,8 +46,10 @@ public List findAllBannerMessages() { public void save( BannerMessage message ) { em.persist(message); + em.flush(); } + public void deleteBannerMessage(Object pk) { BannerMessage message = em.find(BannerMessage.class, pk); diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java deleted file mode 100644 index 7c75b1a4da6..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterCache.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - - -import java.io.Serializable; -import jakarta.persistence.Column; -import jakarta.persistence.Entity; -import jakarta.persistence.GeneratedValue; -import jakarta.persistence.GenerationType; -import jakarta.persistence.Id; -import jakarta.persistence.Lob; -import jakarta.persistence.NamedQueries; -import jakarta.persistence.NamedQuery; -import org.hibernate.validator.constraints.NotBlank; - -/** - * - * @author luopc - */ -@NamedQueries( - @NamedQuery( name="DOIDataCiteRegisterCache.findByDoi", - query="SELECT d FROM DOIDataCiteRegisterCache d WHERE d.doi=:doi") -) -@Entity -public class DOIDataCiteRegisterCache implements Serializable{ - - private static final long serialVersionUID = 8030143094734315681L; - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - @NotBlank - @Column(unique=true) - private String doi; - - @NotBlank - private String url; - - @NotBlank - private String status; - - @NotBlank - @Lob - private String xml; - - public Long getId() { - return id; - } - - public void setId(Long id) { - this.id = id; - } - - public String getDoi() { - return doi; - } - - public void setDoi(String doi) { - this.doi = doi; - } - - public String getStatus() { - return status; - } - - public void setStatus(String status) { - this.status = status; - } - - public String getXml() { - return xml; - } - - public void setXml(String xml) { - this.xml = xml; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java deleted file mode 100644 index 9ecc4a3ecc9..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteRegisterService.java +++ /dev/null @@ -1,707 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.branding.BrandingUtil; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; -import jakarta.persistence.EntityManager; -import jakarta.persistence.PersistenceContext; -import jakarta.persistence.TypedQuery; - -import edu.harvard.iq.dataverse.settings.JvmSettings; -import org.apache.commons.text.StringEscapeUtils; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -/** - * - * @author luopc - */ -@Stateless -public class DOIDataCiteRegisterService { - - private static final Logger logger = Logger.getLogger(DOIDataCiteRegisterService.class.getCanonicalName()); - - @PersistenceContext(unitName = "VDCNet-ejbPU") - private EntityManager em; - - @EJB - DataverseServiceBean dataverseService; - - @EJB - DOIDataCiteServiceBean doiDataCiteServiceBean; - - - //A singleton since it, and the httpClient in it can be reused. - private DataCiteRESTfullClient client=null; - - private DataCiteRESTfullClient getClient() throws IOException { - if (client == null) { - client = new DataCiteRESTfullClient( - JvmSettings.DATACITE_MDS_API_URL.lookup(), - JvmSettings.DATACITE_USERNAME.lookup(), - JvmSettings.DATACITE_PASSWORD.lookup() - ); - } - return client; - } - - /** - * This method is deprecated and unused. We switched away from this method - * when adjusting the code to reserve DOIs from DataCite on dataset create. - * - * Note that the DOIDataCiteRegisterCache entity/table used in this method - * might be a candidate for deprecation as well. Removing it would require - * some refactoring as it is used throughout the DataCite code. - */ - @Deprecated - public String createIdentifierLocal(String identifier, Map metadata, DvObject dvObject) { - - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - String status = metadata.get("_status").trim(); - String target = metadata.get("_target"); - String retString = ""; - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc == null) { - rc = new DOIDataCiteRegisterCache(); - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - em.persist(rc); - } else { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - } - retString = "success to reserved " + identifier; - - return retString; - } - - /** - * This "reserveIdentifier" method is heavily based on the - * "registerIdentifier" method below but doesn't, this one doesn't doesn't - * register a URL, which causes the "state" of DOI to transition from - * "draft" to "findable". Here are some DataCite docs on the matter: - * - * "DOIs can exist in three states: draft, registered, and findable. DOIs - * are in the draft state when metadata have been registered, and will - * transition to the findable state when registering a URL." -- - * https://support.datacite.org/docs/mds-api-guide#doi-states - */ - public String reserveIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - String target = metadata.get("_target"); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - // DataCite uses the term "draft" instead of "reserved". - rc.setStatus("reserved"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - } - - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - - return retString; - } - - public String registerIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - String target = metadata.get("_target"); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("public"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - } - - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); - - return retString; - } - - public String deactivateIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { - String retString = ""; - - String metadataString = getMetadataForDeactivateIdentifier(identifier, metadata, dvObject); - retString = client.postMetadata(metadataString); - retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); - - return retString; - } - - public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - - Dataset dataset = null; - - if (dvObject instanceof Dataset) { - dataset = (Dataset) dvObject; - } else { - dataset = (Dataset) dvObject.getOwner(); - } - - DataCiteMetadataTemplate metadataTemplate = new DataCiteMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); - if (dvObject.isInstanceofDataset()) { - //While getDescriptionPlainText strips < and > from HTML, it leaves '&' (at least so we need to xml escape as well - String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); - if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { - description = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - metadataTemplate.setDescription(description); - } - if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. - //This could/should be removed if the datafile methods add escaping - String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractGlobalIdServiceBean.UNAVAILABLE : fileDescription); - String datasetPid = df.getOwner().getGlobalId().asString(); - metadataTemplate.setDatasetIdentifier(datasetPid); - } else { - metadataTemplate.setDatasetIdentifier(""); - } - - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - String title = dvObject.getCurrentName(); - if(dvObject.isInstanceofDataFile()) { - //Note file title is not currently escaped the way the dataset title is, so adding it here. - title = StringEscapeUtils.escapeXml10(title); - } - - if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { - title = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - - metadataTemplate.setTitle(title); - String producerString = BrandingUtil.getRootDataverseCollectionName(); - if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { - producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; - } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - public static String getMetadataForDeactivateIdentifier(String identifier, Map metadata, DvObject dvObject) { - - DataCiteMetadataTemplate metadataTemplate = new DataCiteMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Util.getListFromStr(metadata.get("datacite.creator"))); - - metadataTemplate.setDescription(AbstractGlobalIdServiceBean.UNAVAILABLE); - - String title =metadata.get("datacite.title"); - - System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - - metadataTemplate.setAuthors(null); - - metadataTemplate.setTitle(title); - String producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; - - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); - - String xmlMetadata = metadataTemplate.generateXML(dvObject); - logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); - return xmlMetadata; - } - - public String modifyIdentifier(String identifier, HashMap metadata, DvObject dvObject) throws IOException { - - String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); - - logger.fine("XML to send to DataCite: " + xmlMetadata); - - String status = metadata.get("_status").trim(); - String target = metadata.get("_target"); - String retString = ""; - if (status.equals("reserved")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc == null) { - rc = new DOIDataCiteRegisterCache(); - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - em.persist(rc); - } else { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("reserved"); - rc.setUrl(target); - } - retString = "success to reserved " + identifier; - } else if (status.equals("public")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - rc.setDoi(identifier); - rc.setXml(xmlMetadata); - rc.setStatus("public"); - if (target == null || target.trim().length() == 0) { - target = rc.getUrl(); - } else { - rc.setUrl(target); - } - try { - DataCiteRESTfullClient client = getClient(); - retString = client.postMetadata(xmlMetadata); - client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); - - } catch (UnsupportedEncodingException ex) { - logger.log(Level.SEVERE, null, ex); - - } catch (RuntimeException rte) { - logger.log(Level.SEVERE, "Error creating DOI at DataCite: {0}", rte.getMessage()); - logger.log(Level.SEVERE, "Exception", rte); - - } - } - } else if (status.equals("unavailable")) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - try { - DataCiteRESTfullClient client = getClient(); - if (rc != null) { - rc.setStatus("unavailable"); - retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); - } - } catch (IOException io) { - - } - } - return retString; - } - - public boolean testDOIExists(String identifier) { - boolean doiExists; - try { - DataCiteRESTfullClient client = getClient(); - doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); - } catch (Exception e) { - logger.log(Level.INFO, identifier, e); - return false; - } - return doiExists; - } - - public HashMap getMetadata(String identifier) throws IOException { - HashMap metadata = new HashMap<>(); - try { - DataCiteRESTfullClient client = getClient(); - String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - DOIDataCiteServiceBean.GlobalIdMetadataTemplate template = doiDataCiteServiceBean.new GlobalIdMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", Util.getStrFromList(template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - metadata.put("_status", rc.getStatus()); - } else { - metadata.put("_status", "public"); - } - } catch (RuntimeException e) { - logger.log(Level.INFO, identifier, e); - } - return metadata; - } - - public DOIDataCiteRegisterCache findByDOI(String doi) { - TypedQuery query = em.createNamedQuery("DOIDataCiteRegisterCache.findByDoi", - DOIDataCiteRegisterCache.class); - query.setParameter("doi", doi); - List rc = query.getResultList(); - if (rc.size() == 1) { - return rc.get(0); - } - return null; - } - - public void deleteIdentifier(String identifier) { - DOIDataCiteRegisterCache rc = findByDOI(identifier); - if (rc != null) { - em.remove(rc); - } - } - -} - -class DataCiteMetadataTemplate { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; - - static { - try (InputStream in = DataCiteMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = Util.readAndClose(in, "utf-8"); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } - - private String xmlMetadata; - private String identifier; - private String datasetIdentifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } - - public void setProducers(List producers) { - this.producers = producers; - } - - public List getContacts() { - return contacts; - } - - public void setContacts(List contacts) { - this.contacts = contacts; - } - - public String getDescription() { - return description; - } - - public void setDescription(String description) { - this.description = description; - } - - public List getAuthors() { - return authors; - } - - public void setAuthors(List authors) { - this.authors = authors; - } - - public DataCiteMetadataTemplate() { - } - - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } - - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - - public DataCiteMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); - } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); - } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); - } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); - } - } - - public String generateXML(DvObject dvObject) { - // Can't use "UNKNOWN" here because DataCite will respond with "[facet 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; - // FIXME: Investigate why this.publisherYear is sometimes null now that pull request #4606 has been merged. - if (this.publisherYear != null) { - // Added to prevent a NullPointerException when trying to destroy datasets when using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", this.identifier.trim()) - .replace("${title}", this.title) - .replace("${publisher}", this.publisher) - .replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors!= null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() && !author.getIdValue().isEmpty() && author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append("" + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement.append("" + author.getAffiliation().getDisplayValue() + ""); - } - creatorsElement.append(""); - } - - } else { - creatorsElement.append("").append(AbstractGlobalIdServiceBean.UNAVAILABLE).append(""); - } - - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); - - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); - } - } - } - - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); - } - contributorsElement.append(""); - } - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); - - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } - - private String generateRelatedIdentifiers(DvObject dvObject) { - - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" + dataFile.getGlobalId() + ""); - } - } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } - } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" + df.getOwner().getGlobalId() + ""); - sb.append(""); - } - return sb.toString(); - } - - public void generateFileIdentifiers(DvObject dvObject) { - - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + template.substring(x, template.length() - 1); - - } - - } else { - xmlMetadata = xmlMetadata.replace("${relatedIdentifier}", ""); - } - } - } - - public static String getTemplate() { - return template; - } - - public static void setTemplate(String template) { - DataCiteMetadataTemplate.template = template; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public void setDatasetIdentifier(String datasetIdentifier) { - this.datasetIdentifier = datasetIdentifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } -} - -class Util { - - public static void close(InputStream in) { - if (in != null) { - try { - in.close(); - } catch (IOException e) { - throw new RuntimeException("Fail to close InputStream"); - } - } - } - - public static String readAndClose(InputStream inStream, String encoding) { - ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - byte[] buf = new byte[128]; - String data; - try { - int cnt; - while ((cnt = inStream.read(buf)) >= 0) { - outStream.write(buf, 0, cnt); - } - data = outStream.toString(encoding); - } catch (IOException ioe) { - throw new RuntimeException("IOException"); - } finally { - close(inStream); - } - return data; - } - - public static List getListFromStr(String str) { - return Arrays.asList(str.split("; ")); -// List authors = new ArrayList(); -// int preIdx = 0; -// for(int i=0;i authors) { - StringBuilder str = new StringBuilder(); - for (String author : authors) { - if (str.length() > 0) { - str.append("; "); - } - str.append(author); - } - return str.toString(); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java deleted file mode 100644 index 48786b41824..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIDataCiteServiceBean.java +++ /dev/null @@ -1,248 +0,0 @@ -package edu.harvard.iq.dataverse; - -import java.io.IOException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.util.Base64; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Level; -import java.util.logging.Logger; - -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; - -import edu.harvard.iq.dataverse.settings.JvmSettings; -import org.apache.commons.httpclient.HttpException; -import org.apache.commons.httpclient.HttpStatus; - - -/** - * - * @author luopc - */ -@Stateless -public class DOIDataCiteServiceBean extends DOIServiceBean { - - private static final Logger logger = Logger.getLogger(DOIDataCiteServiceBean.class.getCanonicalName()); - - private static final String PUBLIC = "public"; - private static final String FINDABLE = "findable"; - private static final String RESERVED = "reserved"; - private static final String DRAFT = "draft"; - - @EJB - DOIDataCiteRegisterService doiDataCiteRegisterService; - - @Override - public boolean registerWhenPublished() { - return false; - } - - - - @Override - public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { - logger.log(Level.FINE,"alreadyRegistered"); - if(pid==null || pid.asString().isEmpty()) { - logger.fine("No identifier sent."); - return false; - } - boolean alreadyRegistered; - String identifier = pid.asString(); - try{ - alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); - } catch (Exception e){ - logger.log(Level.WARNING, "alreadyRegistered failed"); - return false; - } - return alreadyRegistered; - } - - @Override - public String createIdentifier(DvObject dvObject) throws Exception { - logger.log(Level.FINE,"createIdentifier"); - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - String identifier = getIdentifier(dvObject); - Map metadata = getMetadataForCreateIndicator(dvObject); - metadata.put("_status", "reserved"); - try { - String retString = doiDataCiteRegisterService.reserveIdentifier(identifier, metadata, dvObject); - logger.log(Level.FINE, "create DOI identifier retString : " + retString); - return retString; - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier not created: create failed", e); - throw e; - } - } - - @Override - public Map getIdentifierMetadata(DvObject dvObject) { - logger.log(Level.FINE,"getIdentifierMetadata"); - String identifier = getIdentifier(dvObject); - Map metadata = new HashMap<>(); - try { - metadata = doiDataCiteRegisterService.getMetadata(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "getIdentifierMetadata failed", e); - } - return metadata; - } - - - /** - * Modifies the DOI metadata for a Dataset - * @param dvObject the dvObject whose metadata needs to be modified - * @return the Dataset identifier, or null if the modification failed - * @throws java.lang.Exception - */ - @Override - public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { - logger.log(Level.FINE,"modifyIdentifier"); - String identifier = getIdentifier(dvObject); - try { - HashMap metadata = doiDataCiteRegisterService.getMetadata(identifier); - doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); - } catch (Exception e) { - logger.log(Level.WARNING, "modifyMetadata failed", e); - throw e; - } - return identifier; - } - - public void deleteRecordFromCache(Dataset datasetIn){ - logger.log(Level.FINE,"deleteRecordFromCache"); - String identifier = getIdentifier(datasetIn); - HashMap doiMetadata = new HashMap(); - try { - doiMetadata = doiDataCiteRegisterService.getMetadata(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "get matadata failed cannot delete"); - logger.log(Level.WARNING, "String {0}", e.toString()); - logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); - logger.log(Level.WARNING, "cause", e.getCause()); - logger.log(Level.WARNING, "message {0}", e.getMessage()); - } - - String idStatus = (String) doiMetadata.get("_status"); - - if (idStatus == null || idStatus.equals("reserved")) { - logger.log(Level.WARNING, "Delete status is reserved.."); - try { - doiDataCiteRegisterService.deleteIdentifier(identifier); - } catch (Exception e) { - logger.log(Level.WARNING, "delete failed"); - logger.log(Level.WARNING, "String {0}", e.toString()); - logger.log(Level.WARNING, "localized message {0}", e.getLocalizedMessage()); - logger.log(Level.WARNING, "cause", e.getCause()); - logger.log(Level.WARNING, "message {0}", e.getMessage()); - throw new RuntimeException(e); - } - } - } - - /* - * Deletes a DOI if it is in DRAFT/RESERVED state or removes metadata and changes it from PUBLIC/FINDABLE to REGISTERED. - */ - @Override - public void deleteIdentifier(DvObject dvObject) throws IOException, HttpException { - logger.log(Level.FINE,"deleteIdentifier"); - String identifier = getIdentifier(dvObject); - //ToDo - PidUtils currently has a DataCite API call that would get the status at DataCite for this identifier - that could be more accurate than assuming based on whether the dvObject has been published - String idStatus = DRAFT; - if(dvObject.isReleased()) { - idStatus = PUBLIC; - } - if ( idStatus != null ) { - switch ( idStatus ) { - case RESERVED: - case DRAFT: - logger.log(Level.INFO, "Delete status is reserved.."); - //service only removes the identifier from the cache (since it was written before DOIs could be registered in draft state) - doiDataCiteRegisterService.deleteIdentifier(identifier); - //So we call the deleteDraftIdentifier method below until things are refactored - deleteDraftIdentifier(dvObject); - break; - - case PUBLIC: - case FINDABLE: - //if public then it has been released set to unavailable and reset target to n2t url - Map metadata = addDOIMetadataForDestroyedDataset(dvObject); - metadata.put("_status", "registered"); - metadata.put("_target", getTargetUrl(dvObject)); - doiDataCiteRegisterService.deactivateIdentifier(identifier, metadata, dvObject); - break; - } - } - } - - /** - * Deletes DOI from the DataCite side, if possible. Only "draft" DOIs can be - * deleted. - */ - private void deleteDraftIdentifier(DvObject dvObject) throws IOException { - - //ToDo - incorporate into DataCiteRESTfulClient - String baseUrl = JvmSettings.DATACITE_REST_API_URL.lookup(); - String username = JvmSettings.DATACITE_USERNAME.lookup(); - String password = JvmSettings.DATACITE_PASSWORD.lookup(); - GlobalId doi = dvObject.getGlobalId(); - /** - * Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted - * (only possible for "draft" DOIs), 405 (method not allowed) if the DOI - * wasn't deleted (because it's in "findable" state, for example, 404 if the - * DOI wasn't found, and possibly other status codes such as 500 if DataCite - * is down. - */ - - URL url = new URL(baseUrl + "/dois/" + doi.getAuthority() + "/" + doi.getIdentifier()); - HttpURLConnection connection = null; - connection = (HttpURLConnection) url.openConnection(); - connection.setRequestMethod("DELETE"); - String userpass = username + ":" + password; - String basicAuth = "Basic " + new String(Base64.getEncoder().encode(userpass.getBytes())); - connection.setRequestProperty("Authorization", basicAuth); - int status = connection.getResponseCode(); - if(status!=HttpStatus.SC_NO_CONTENT) { - logger.warning("Incorrect Response Status from DataCite: " + status + " : " + connection.getResponseMessage()); - throw new HttpException("Status: " + status); - } - logger.fine("deleteDoi status for " + doi.asString() + ": " + status); - } - - @Override - public boolean publicizeIdentifier(DvObject dvObject) { - logger.log(Level.FINE,"updateIdentifierStatus"); - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - String identifier = getIdentifier(dvObject); - Map metadata = getUpdateMetadata(dvObject); - metadata.put("_status", PUBLIC); - metadata.put("datacite.publicationyear", generateYear(dvObject)); - metadata.put("_target", getTargetUrl(dvObject)); - try { - doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject); - return true; - } catch (Exception e) { - logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); - return false; - } - } - - - @Override - public List getProviderInformation(){ - return List.of("DataCite", "https://status.datacite.org"); - } - - - - @Override - protected String getProviderKeyName() { - return "DataCite"; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java deleted file mode 100644 index 0182c745cd0..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DOIServiceBean.java +++ /dev/null @@ -1,78 +0,0 @@ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; - -public abstract class DOIServiceBean extends AbstractGlobalIdServiceBean { - - public static final String DOI_PROTOCOL = "doi"; - public static final String DOI_RESOLVER_URL = "https://doi.org/"; - public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; - public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; - public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; - - public DOIServiceBean() { - super(); - } - - @Override - public GlobalId parsePersistentId(String pidString) { - if (pidString.startsWith(DOI_RESOLVER_URL)) { - pidString = pidString.replace(DOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) { - pidString = pidString.replace(HTTP_DOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) { - pidString = pidString.replace(DXDOI_RESOLVER_URL, - (DOI_PROTOCOL + ":")); - } - return super.parsePersistentId(pidString); - } - - @Override - public GlobalId parsePersistentId(String protocol, String identifierString) { - - if (!DOI_PROTOCOL.equals(protocol)) { - return null; - } - GlobalId globalId = super.parsePersistentId(protocol, identifierString); - if (globalId!=null && !GlobalIdServiceBean.checkDOIAuthority(globalId.getAuthority())) { - return null; - } - return globalId; - } - - @Override - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - - if (!DOI_PROTOCOL.equals(protocol)) { - return null; - } - return super.parsePersistentId(protocol, authority, identifier); - } - - public String getUrlPrefix() { - return DOI_RESOLVER_URL; - } - - @Override - public boolean isConfigured() { - if (configured == null) { - if (getProviderKeyName() == null) { - configured = false; - } else { - String doiProvider = settingsService.getValueForKey(Key.DoiProvider, ""); - if (getProviderKeyName().equals(doiProvider)) { - configured = true; - } else if (!doiProvider.isEmpty()) { - configured = false; - } - } - } - return super.isConfigured(); - } - - protected String getProviderKeyName() { - return null; - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 9b4b89db44f..a012175deae 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; @@ -635,12 +636,12 @@ public Map getDataCiteMetadata() { String authorString = getAuthorsString(); if (authorString.isEmpty()) { - authorString = AbstractGlobalIdServiceBean.UNAVAILABLE; + authorString = AbstractPidProvider.UNAVAILABLE; } String producerString = getPublisher(); if (producerString.isEmpty()) { - producerString = AbstractGlobalIdServiceBean.UNAVAILABLE; + producerString = AbstractPidProvider.UNAVAILABLE; } metadata.put("datacite.creator", authorString); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 407282a5372..29a4a14c021 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -242,6 +242,18 @@ public void setEmbargo(Embargo embargo) { this.embargo = embargo; } + @ManyToOne + @JoinColumn(name="retention_id") + private Retention retention; + + public Retention getRetention() { + return retention; + } + + public void setRetention(Retention retention) { + this.retention = retention; + } + public DataFile() { this.fileMetadatas = new ArrayList<>(); initFileReplaceAttributes(); @@ -545,61 +557,61 @@ public void setDescription(String description) { fmd.setDescription(description); } } + + public FileMetadata getDraftFileMetadata() { + FileMetadata latestFileMetadata = getLatestFileMetadata(); + if (latestFileMetadata.getDatasetVersion().isDraft()) { + return latestFileMetadata; + } + return null; + } public FileMetadata getFileMetadata() { return getLatestFileMetadata(); } - + public FileMetadata getLatestFileMetadata() { - FileMetadata fmd = null; + FileMetadata resultFileMetadata = null; - // for newly added or harvested, just return the one fmd if (fileMetadatas.size() == 1) { return fileMetadatas.get(0); } - + for (FileMetadata fileMetadata : fileMetadatas) { - // if it finds a draft, return it if (fileMetadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) { return fileMetadata; - } - - // otherwise return the one with the latest version number - // duplicate logic in getLatestPublishedFileMetadata() - if (fmd == null || fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber() ) > 0 ) { - fmd = fileMetadata; - } else if ((fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber())==0 )&& - ( fileMetadata.getDatasetVersion().getMinorVersionNumber().compareTo( fmd.getDatasetVersion().getMinorVersionNumber()) > 0 ) ) { - fmd = fileMetadata; } + resultFileMetadata = getTheNewerFileMetadata(resultFileMetadata, fileMetadata); } - return fmd; + + return resultFileMetadata; } - -// //Returns null if no published version + public FileMetadata getLatestPublishedFileMetadata() throws UnsupportedOperationException { - FileMetadata fmd = null; - - for (FileMetadata fileMetadata : fileMetadatas) { - // if it finds a draft, skip - if (fileMetadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) { - continue; - } - - // otherwise return the one with the latest version number - // duplicate logic in getLatestFileMetadata() - if (fmd == null || fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber() ) > 0 ) { - fmd = fileMetadata; - } else if ((fileMetadata.getDatasetVersion().getVersionNumber().compareTo( fmd.getDatasetVersion().getVersionNumber())==0 )&& - ( fileMetadata.getDatasetVersion().getMinorVersionNumber().compareTo( fmd.getDatasetVersion().getMinorVersionNumber()) > 0 ) ) { - fmd = fileMetadata; - } - } - if(fmd == null) { + FileMetadata resultFileMetadata = fileMetadatas.stream() + .filter(metadata -> !metadata.getDatasetVersion().getVersionState().equals(VersionState.DRAFT)) + .reduce(null, DataFile::getTheNewerFileMetadata); + + if (resultFileMetadata == null) { throw new UnsupportedOperationException("No published metadata version for DataFile " + this.getId()); } - return fmd; + return resultFileMetadata; + } + + public static FileMetadata getTheNewerFileMetadata(FileMetadata current, FileMetadata candidate) { + if (current == null) { + return candidate; + } + + DatasetVersion currentVersion = current.getDatasetVersion(); + DatasetVersion candidateVersion = candidate.getDatasetVersion(); + + if (DatasetVersion.compareByVersion.compare(candidateVersion, currentVersion) > 0) { + return candidate; + } + + return current; } /** @@ -610,7 +622,7 @@ public long getFilesize() { if (this.filesize == null) { // -1 means "unknown" return -1; - } + } return this.filesize; } @@ -640,7 +652,7 @@ public String getFriendlySize() { return BundleUtil.getStringFromBundle("file.sizeNotAvailable"); } } - + public boolean isRestricted() { return restricted; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java index 332a39912d2..21f925f8981 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; @@ -8,6 +9,9 @@ import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.FileSortFieldAndOrder; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -41,8 +45,6 @@ * * @author Leonid Andreev * - * Basic skeleton of the new DataFile service for DVN 4.0 - * */ @Stateless @@ -66,6 +68,9 @@ public class DataFileServiceBean implements java.io.Serializable { @EJB SystemConfig systemConfig; + @EJB + StorageUseServiceBean storageUseService; + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -139,39 +144,6 @@ public class DataFileServiceBean implements java.io.Serializable { */ public static final String MIME_TYPE_PACKAGE_FILE = "application/vnd.dataverse.file-package"; - public class UserStorageQuota { - private Long totalAllocatedInBytes = 0L; - private Long totalUsageInBytes = 0L; - - public UserStorageQuota(Long allocated, Long used) { - this.totalAllocatedInBytes = allocated; - this.totalUsageInBytes = used; - } - - public Long getTotalAllocatedInBytes() { - return totalAllocatedInBytes; - } - - public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) { - this.totalAllocatedInBytes = totalAllocatedInBytes; - } - - public Long getTotalUsageInBytes() { - return totalUsageInBytes; - } - - public void setTotalUsageInBytes(Long totalUsageInBytes) { - this.totalUsageInBytes = totalUsageInBytes; - } - - public Long getRemainingQuotaInBytes() { - if (totalUsageInBytes > totalAllocatedInBytes) { - return 0L; - } - return totalAllocatedInBytes - totalUsageInBytes; - } - } - public DataFile find(Object pk) { return em.find(DataFile.class, pk); } @@ -412,7 +384,8 @@ public FileMetadata findMostRecentVersionFileIsIn(DataFile file) { if (fileMetadatas == null || fileMetadatas.isEmpty()) { return null; } else { - return fileMetadatas.get(0); + // This assumes the order of filemetadatas is from first to most recent, which is true as of v6.3 + return fileMetadatas.get(fileMetadatas.size() - 1); } } @@ -788,6 +761,13 @@ public List findAll() { return em.createQuery("select object(o) from DataFile as o order by o.id", DataFile.class).getResultList(); } + public List findVersionStates(Long fileId) { + Query query = em.createQuery( + "select distinct dv.versionState from DatasetVersion dv where dv.id in (select fm.datasetVersion.id from FileMetadata fm where fm.dataFile.id=:fileId)"); + query.setParameter("fileId", fileId); + return query.getResultList(); + } + public DataFile save(DataFile dataFile) { if (dataFile.isMergeable()) { @@ -965,7 +945,7 @@ public boolean isThumbnailAvailable (DataFile file) { } // If thumbnails are not even supported for this class of files, - // there's notthing to talk about: + // there's nothing to talk about: if (!FileUtil.isThumbnailSupported(file)) { return false; } @@ -980,16 +960,17 @@ public boolean isThumbnailAvailable (DataFile file) { is more important... */ - - if (ImageThumbConverter.isThumbnailAvailable(file)) { - file = this.find(file.getId()); - file.setPreviewImageAvailable(true); - this.save(file); - return true; - } - - return false; + file = this.find(file.getId()); + if (ImageThumbConverter.isThumbnailAvailable(file)) { + file.setPreviewImageAvailable(true); + this.save(file); + return true; + } + file.setPreviewImageFail(true); + file.setPreviewImageAvailable(false); + this.save(file); + return false; } @@ -1271,9 +1252,8 @@ public List selectFilesWithMissingOriginalSizes() { * Check that a identifier entered by the user is unique (not currently used * for any other study in this Dataverse Network). Also check for duplicate * in the remote PID service if needed - * @param userIdentifier - * @param datafile - * @param idServiceBean + * @param datafileId + * @param storageLocation * @return {@code true} iff the global identifier is unique. */ public void finalizeFileDelete(Long dataFileId, String storageLocation) throws IOException { @@ -1395,29 +1375,42 @@ public Embargo findEmbargo(Long id) { DataFile d = find(id); return d.getEmbargo(); } - - public Long getStorageUsageByCreator(AuthenticatedUser user) { - Query query = em.createQuery("SELECT SUM(o.filesize) FROM DataFile o WHERE o.creator.id=:creatorId"); - - try { - Long totalSize = (Long)query.setParameter("creatorId", user.getId()).getSingleResult(); - logger.info("total size for user: "+totalSize); - return totalSize == null ? 0L : totalSize; - } catch (NoResultException nre) { // ? - logger.info("NoResultException, returning 0L"); - return 0L; - } + + public boolean isRetentionExpired(FileMetadata fm) { + return FileUtil.isRetentionExpired(fm); } - - public UserStorageQuota getUserStorageQuota(AuthenticatedUser user, Dataset dataset) { - // this is for testing only - one pre-set, installation-wide quota limit - // for everybody: - Long totalAllocated = systemConfig.getTestStorageQuotaLimit(); - // again, this is for testing only - we are only counting the total size - // of all the files created by this user; it will likely be a much more - // complex calculation in real life applications: - Long totalUsed = getStorageUsageByCreator(user); + /** + * Checks if the supplied DvObjectContainer (Dataset or Collection; although + * only collection-level storage quotas are officially supported as of now) + * has a quota configured, and if not, keeps checking if any of the direct + * ancestor Collections further up have a configured quota. If it finds one, + * it will retrieve the current total content size for that specific ancestor + * dvObjectContainer and use it to define the quota limit for the upload + * session in progress. + * + * @param parent - DvObjectContainer, Dataset or Collection + * @return upload session size limit spec, or null if quota not defined on + * any of the ancestor DvObjectContainers + */ + public UploadSessionQuotaLimit getUploadSessionQuotaLimit(DvObjectContainer parent) { + DvObjectContainer testDvContainer = parent; + StorageQuota quota = testDvContainer.getStorageQuota(); + while (quota == null && testDvContainer.getOwner() != null) { + testDvContainer = testDvContainer.getOwner(); + quota = testDvContainer.getStorageQuota(); + if (quota != null) { + break; + } + } + if (quota == null || quota.getAllocation() == null) { + return null; + } + + // Note that we are checking the recorded storage use not on the + // immediate parent necessarily, but on the specific ancestor + // DvObjectContainer on which the storage quota is defined: + Long currentSize = storageUseService.findStorageSizeByDvContainerId(testDvContainer.getId()); - return new UserStorageQuota(totalAllocated, totalUsed); + return new UploadSessionQuotaLimit(quota.getAllocation(), currentSize); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataTable.java b/src/main/java/edu/harvard/iq/dataverse/DataTable.java index a17d8c65138..95f3aed0f40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataTable.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataTable.java @@ -112,6 +112,16 @@ public DataTable() { @Column( nullable = true ) private String originalFileName; + + /** + * The physical tab-delimited file is in storage with the list of variable + * names saved as the 1st line. This means that we do not need to generate + * this line on the fly. (Also means that direct download mechanism can be + * used for this file!) + */ + @Column(nullable = false) + private boolean storedWithVariableHeader = false; + /* * Getter and Setter methods: */ @@ -206,6 +216,14 @@ public void setOriginalFileName(String originalFileName) { this.originalFileName = originalFileName; } + public boolean isStoredWithVariableHeader() { + return storedWithVariableHeader; + } + + public void setStoredWithVariableHeader(boolean storedWithVariableHeader) { + this.storedWithVariableHeader = storedWithVariableHeader; + } + /* * Custom overrides for hashCode(), equals() and toString() methods: */ diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 245bdf0efd2..eaf406d01bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -35,6 +35,7 @@ import jakarta.persistence.TemporalType; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -189,6 +190,10 @@ public void setTemplate(Template template) { } public Dataset() { + this(false); + } + + public Dataset(boolean isHarvested) { DatasetVersion datasetVersion = new DatasetVersion(); datasetVersion.setDataset(this); datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT); @@ -196,6 +201,11 @@ public Dataset() { datasetVersion.setVersionNumber((long) 1); datasetVersion.setMinorVersionNumber((long) 0); versions.add(datasetVersion); + + if (!isHarvested) { + StorageUse storageUse = new StorageUse(this); + this.setStorageUse(storageUse); + } } /** @@ -307,6 +317,7 @@ public boolean isDeaccessioned() { } return hasDeaccessionedVersions; // since any published version would have already returned } + public DatasetVersion getLatestVersion() { return getVersions().get(0); @@ -842,6 +853,23 @@ public String getRemoteArchiveURL() { if (StringUtil.nonEmpty(this.getProtocol()) && StringUtil.nonEmpty(this.getAuthority()) && StringUtil.nonEmpty(this.getIdentifier())) { + + // If there is a custom archival url for this Harvesting + // Source, we'll use that + String harvestingUrl = this.getHarvestedFrom().getHarvestingUrl(); + String archivalUrl = this.getHarvestedFrom().getArchiveUrl(); + if (!harvestingUrl.contains(archivalUrl)) { + // When a Harvesting Client is created, the “archive url” is set to + // just the host part of the OAI url automatically. + // For example, if the OAI url was "https://remote.edu/oai", + // the archive url will default to "https://remote.edu/". + // If this is no longer true, we know it means the admin + // went to the trouble of setting it to something else - + // so we should use this url for the redirects back to source, + // instead of the global id resolver. + return archivalUrl + this.getAuthority() + "/" + this.getIdentifier(); + } + // ... if not, we'll redirect to the resolver for the global id: return this.getPersistentURL(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java index c836a20893f..31e7758c7d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetField.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetField.java @@ -595,7 +595,8 @@ public boolean removeBlankDatasetFieldValues() { return true; } } else { // controlled vocab - if (this.getControlledVocabularyValues().isEmpty()) { + // during harvesting some CVV are put in getDatasetFieldValues. we don't want to remove those + if (this.getControlledVocabularyValues().isEmpty() && this.getDatasetFieldValues().isEmpty()) { return true; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 1621b80df55..d91aa101eb5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -91,8 +91,9 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String datasetVersionValue="datasetVersionValue"; public final static String versionDate="versionDate"; public final static String keywordValue="keywordValue"; - public final static String keywordVocab="keywordVocabulary"; //SEK 6/10/2016 to match what is in the db - public final static String keywordVocabURI="keywordVocabularyURI"; //SEK 6/10/2016 to match what is in the db + public final static String keywordTermURI="keywordTermURI"; + public final static String keywordVocab="keywordVocabulary"; + public final static String keywordVocabURI="keywordVocabularyURI"; public final static String topicClassValue="topicClassValue"; public final static String topicClassVocab="topicClassVocab"; public final static String topicClassVocabURI="topicClassVocabURI"; @@ -112,8 +113,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String geographicUnit="geographicUnit"; public final static String westLongitude="westLongitude"; public final static String eastLongitude="eastLongitude"; - public final static String northLatitude="northLongitude"; //Changed to match DB - incorrectly entered into DB: https://github.com/IQSS/dataverse/issues/5645 - public final static String southLatitude="southLongitude"; //Incorrect in DB: https://github.com/IQSS/dataverse/issues/5645 + public final static String northLatitude="northLatitude"; + public final static String southLatitude="southLatitude"; public final static String unitOfAnalysis="unitOfAnalysis"; public final static String universe="universe"; public final static String kindOfData="kindOfData"; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index ce2b00086ec..34595728fa7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -4,7 +4,9 @@ import java.io.StringReader; import java.net.URI; import java.net.URISyntaxException; +import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import java.security.InvalidParameterException; import java.sql.Timestamp; import java.text.MessageFormat; import java.time.Instant; @@ -34,10 +36,12 @@ import jakarta.persistence.NoResultException; import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.PersistenceException; import jakarta.persistence.TypedQuery; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.httpclient.HttpException; +import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.client.methods.HttpGet; @@ -46,7 +50,6 @@ import org.apache.http.impl.client.HttpClients; import org.apache.http.protocol.HttpContext; import org.apache.http.util.EntityUtils; - import edu.harvard.iq.dataverse.settings.SettingsServiceBean; /** @@ -319,14 +322,15 @@ public Map getCVocConf(boolean byTermUriField){ + jo.getString("term-uri-field")); } } - if (jo.containsKey("child-fields")) { - JsonArray childFields = jo.getJsonArray("child-fields"); - for (JsonString elm : childFields.getValuesAs(JsonString.class)) { - dft = findByNameOpt(elm.getString()); - logger.info("Found: " + dft.getName()); + if (jo.containsKey("managed-fields")) { + JsonObject managedFields = jo.getJsonObject("managed-fields"); + for (String s : managedFields.keySet()) { + dft = findByNameOpt(managedFields.getString(s)); if (dft == null) { logger.warning("Ignoring External Vocabulary setting for non-existent child field: " - + elm.getString()); + + managedFields.getString(s)); + } else { + logger.fine("Found: " + dft.getName()); } } } @@ -343,12 +347,16 @@ public Map getCVocConf(boolean byTermUriField){ * @param df - the primitive/parent compound field containing a newly saved value */ public void registerExternalVocabValues(DatasetField df) { - DatasetFieldType dft =df.getDatasetFieldType(); + DatasetFieldType dft = df.getDatasetFieldType(); logger.fine("Registering for field: " + dft.getName()); JsonObject cvocEntry = getCVocConf(true).get(dft.getId()); if (dft.isPrimitive()) { + List siblingsDatasetFields = new ArrayList<>(); + if(dft.getParentDatasetFieldType()!=null) { + siblingsDatasetFields = df.getParentDatasetFieldCompoundValue().getChildDatasetFields(); + } for (DatasetFieldValue dfv : df.getDatasetFieldValues()) { - registerExternalTerm(cvocEntry, dfv.getValue()); + registerExternalTerm(cvocEntry, dfv.getValue(), siblingsDatasetFields); } } else { if (df.getDatasetFieldType().isCompound()) { @@ -357,45 +365,55 @@ public void registerExternalVocabValues(DatasetField df) { for (DatasetField cdf : cv.getChildDatasetFields()) { logger.fine("Found term uri field type id: " + cdf.getDatasetFieldType().getId()); if (cdf.getDatasetFieldType().equals(termdft)) { - registerExternalTerm(cvocEntry, cdf.getValue()); + registerExternalTerm(cvocEntry, cdf.getValue(), cv.getChildDatasetFields()); } } } } } } - + /** - * Retrieves indexable strings from a cached externalvocabularyvalue entry. - * - * This method assumes externalvocabularyvalue entries have been filtered and - * the externalvocabularyvalue entry contain a single JsonObject whose "personName" or "termName" values - * are either Strings or an array of objects with "lang" and ("value" or "content") keys. The - * string, or the "value/content"s for each language are added to the set. - * + * Retrieves indexable strings from a cached externalvocabularyvalue entry filtered through retrieval-filtering configuration. + *

+ * This method assumes externalvocabularyvalue entries have been filtered and that they contain a single JsonObject. + * Cases Handled : A String, an Array of Strings, an Array of Objects with "value" or "content" keys, an Object with one or more entries that have String values or Array values with a set of String values. + * The string(s), or the "value/content"s for each language are added to the set. + * Retrieved string values are indexed in the term-uri-field (parameter defined in CVOC configuration) by default, or in the field specified by an optional "indexIn" parameter in the retrieval-filtering defined in the CVOC configuration. + *

* Any parsing error results in no entries (there can be unfiltered entries with * unknown structure - getting some strings from such an entry could give fairly * random info that would be bad to addd for searches, etc.) - * - * @param termUri + * + * @param termUri unique identifier to search in database + * @param cvocEntry related cvoc configuration + * @param indexingField name of solr field that will be filled with getStringsFor while indexing * @return - a set of indexable strings */ - public Set getStringsFor(String termUri) { - Set strings = new HashSet(); + public Set getIndexableStringsByTermUri(String termUri, JsonObject cvocEntry, String indexingField) { + Set strings = new HashSet<>(); JsonObject jo = getExternalVocabularyValue(termUri); + JsonObject filtering = cvocEntry.getJsonObject("retrieval-filtering"); + String termUriField = cvocEntry.getJsonString("term-uri-field").getString(); if (jo != null) { try { for (String key : jo.keySet()) { - if (key.equals("termName") || key.equals("personName")) { + String indexIn = filtering.getJsonObject(key).getString("indexIn", null); + // Either we are in mapping mode so indexingField (solr field) equals indexIn (cvoc config) + // Or we are in default mode indexingField is termUriField, indexIn is not defined then only termName and personName keys are used + if (indexingField.equals(indexIn) || + (indexIn == null && termUriField.equals(indexingField) && (key.equals("termName")) || key.equals("personName"))) { JsonValue jv = jo.get(key); if (jv.getValueType().equals(JsonValue.ValueType.STRING)) { logger.fine("adding " + jo.getString(key) + " for " + termUri); strings.add(jo.getString(key)); - } else { - if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { - JsonArray jarr = jv.asJsonArray(); - for (int i = 0; i < jarr.size(); i++) { + } else if (jv.getValueType().equals(JsonValue.ValueType.ARRAY)) { + JsonArray jarr = jv.asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + strings.add(jarr.getString(i)); + } else if (jarr.get(i).getValueType().equals(ValueType.OBJECT)) { // This condition handles SKOMOS format like [{"lang": "en","value": "non-apis bee"},{"lang": "fr","value": "abeille non apis"}] JsonObject entry = jarr.getJsonObject(i); if (entry.containsKey("value")) { logger.fine("adding " + entry.getString("value") + " for " + termUri); @@ -407,6 +425,22 @@ public Set getStringsFor(String termUri) { } } } + } else if (jv.getValueType().equals(JsonValue.ValueType.OBJECT)) { + JsonObject joo = jv.asJsonObject(); + for (Map.Entry entry : joo.entrySet()) { + if (entry.getValue().getValueType().equals(JsonValue.ValueType.STRING)) { // This condition handles format like { "fr": "association de quartier", "en": "neighborhood associations"} + logger.fine("adding " + joo.getString(entry.getKey()) + " for " + termUri); + strings.add(joo.getString(entry.getKey())); + } else if (entry.getValue().getValueType().equals(ValueType.ARRAY)) { // This condition handles format like {"en": ["neighbourhood societies"]} + JsonArray jarr = entry.getValue().asJsonArray(); + for (int i = 0; i < jarr.size(); i++) { + if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { + logger.fine("adding " + jarr.getString(i) + " for " + termUri); + strings.add(jarr.getString(i)); + } + } + } + } } } } @@ -418,7 +452,7 @@ public Set getStringsFor(String termUri) { } logger.fine("Returning " + String.join(",", strings) + " for " + termUri); return strings; - } + } /** * Perform a query to retrieve a cached value from the externalvocabularvalue table @@ -445,16 +479,20 @@ public JsonObject getExternalVocabularyValue(String termUri) { /** * Perform a call to the external service to retrieve information about the term URI - * @param cvocEntry - the configuration for the DatasetFieldType associated with this term - * @param term - the term uri as a string + * + * @param cvocEntry - the configuration for the DatasetFieldType associated with this term + * @param term - the term uri as a string + * @param relatedDatasetFields - siblings or childs of the term */ - public void registerExternalTerm(JsonObject cvocEntry, String term) { + public void registerExternalTerm(JsonObject cvocEntry, String term, List relatedDatasetFields) { String retrievalUri = cvocEntry.getString("retrieval-uri"); + String termUriFieldName = cvocEntry.getString("term-uri-field"); String prefix = cvocEntry.getString("prefix", null); - if(term.isBlank()) { - logger.fine("Ingoring blank term"); + if(StringUtils.isBlank(term)) { + logger.fine("Ignoring blank term"); return; } + boolean isExternal = false; JsonObject vocabs = cvocEntry.getJsonObject("vocabs"); for (String key: vocabs.keySet()) { @@ -483,7 +521,22 @@ public void registerExternalTerm(JsonObject cvocEntry, String term) { } if (evv.getValue() == null) { String adjustedTerm = (prefix==null)? term: term.replace(prefix, ""); - retrievalUri = retrievalUri.replace("{0}", adjustedTerm); + + try { + retrievalUri = tryToReplaceRetrievalUriParam(retrievalUri, "0", adjustedTerm); + retrievalUri = tryToReplaceRetrievalUriParam(retrievalUri, termUriFieldName, adjustedTerm); + for (DatasetField f : relatedDatasetFields) { + retrievalUri = tryToReplaceRetrievalUriParam(retrievalUri, f.getDatasetFieldType().getName(), f.getValue()); + } + } catch (InvalidParameterException e) { + logger.warning("InvalidParameterException in tryReplaceRetrievalUriParam : " + e.getMessage()); + return; + } + if (retrievalUri.contains("{")) { + logger.severe("Retrieval URI still contains unreplaced parameter :" + retrievalUri); + return; + } + logger.fine("Didn't find " + term + ", calling " + retrievalUri); try (CloseableHttpClient httpClient = HttpClients.custom() .addInterceptorLast(new HttpResponseInterceptor() { @@ -502,14 +555,21 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep HttpGet httpGet = new HttpGet(retrievalUri); //application/json+ld is for backward compatibility httpGet.addHeader("Accept", "application/ld+json, application/json+ld, application/json"); - + //Adding others custom HTTP request headers if exists + final JsonObject headers = cvocEntry.getJsonObject("headers"); + if (headers != null) { + final Set headerKeys = headers.keySet(); + for (final String hKey: headerKeys) { + httpGet.addHeader(hKey, headers.getString(hKey)); + } + } HttpResponse response = httpClient.execute(httpGet); String data = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); int statusCode = response.getStatusLine().getStatusCode(); if (statusCode == 200) { logger.fine("Returned data: " + data); try (JsonReader jsonReader = Json.createReader(new StringReader(data))) { - String dataObj =filterResponse(cvocEntry, jsonReader.readObject(), term).toString(); + String dataObj = filterResponse(cvocEntry, jsonReader.readObject(), term).toString(); evv.setValue(dataObj); evv.setLastUpdateDate(Timestamp.from(Instant.now())); logger.fine("JsonObject: " + dataObj); @@ -518,6 +578,8 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep logger.fine("Wrote value for term: " + term); } catch (JsonException je) { logger.severe("Error retrieving: " + retrievalUri + " : " + je.getMessage()); + } catch (PersistenceException e) { + logger.fine("Problem persisting: " + retrievalUri + " : " + e.getMessage()); } } else { logger.severe("Received response code : " + statusCode + " when retrieving " + retrievalUri @@ -526,19 +588,42 @@ public void process(HttpResponse response, HttpContext context) throws HttpExcep } catch (IOException ioe) { logger.severe("IOException when retrieving url: " + retrievalUri + " : " + ioe.getMessage()); } - } } catch (URISyntaxException e) { logger.fine("Term is not a URI: " + term); } + } + + private String tryToReplaceRetrievalUriParam(String retrievalUri, String paramName, String value) throws InvalidParameterException { + + if(StringUtils.isBlank(paramName)) { + throw new InvalidParameterException("Empty or null paramName is not allowed while replacing retrieval uri parameter"); + } + + if(retrievalUri.contains(paramName)) { + logger.fine("Parameter {" + paramName + "} found in retrievalUri"); + + if(StringUtils.isBlank(value)) { + throw new InvalidParameterException("Empty or null value is not allowed while replacing retrieval uri parameter"); + } + + if(retrievalUri.contains("encodeUrl:" + paramName)) { + retrievalUri = retrievalUri.replace("{encodeUrl:"+paramName+"}", URLEncoder.encode(value, StandardCharsets.UTF_8)); + } else { + retrievalUri = retrievalUri.replace("{"+paramName+"}", value); + } + } else { + logger.fine("Parameter {" + paramName + "} not found in retrievalUri"); + } + return retrievalUri; } /** * Parse the raw value returned by an external service for a give term uri and * filter it according to the 'retrieval-filtering' configuration for this * DatasetFieldType, creating a Json value with the specified structure - * + * * @param cvocEntry - the config for this DatasetFieldType * @param readObject - the raw response from the service * @param termUri - the term uri @@ -597,6 +682,8 @@ private JsonObject filterResponse(JsonObject cvocEntry, JsonObject readObject, S if (pattern.equals("{0}")) { if (vals.get(0) instanceof JsonArray) { job.add(filterKey, (JsonArray) vals.get(0)); + } else if (vals.get(0) instanceof JsonObject) { + job.add(filterKey, (JsonObject) vals.get(0)); } else { job.add(filterKey, (String) vals.get(0)); } @@ -634,7 +721,7 @@ Object processPathSegment(int index, String[] pathParts, JsonValue curPath, Stri String[] keyVal = pathParts[index].split("="); logger.fine("Looking for object where " + keyVal[0] + " is " + keyVal[1]); String expected = keyVal[1]; - + if (!expected.equals("*")) { if (expected.equals("@id")) { expected = termUri; @@ -663,7 +750,7 @@ Object processPathSegment(int index, String[] pathParts, JsonValue curPath, Stri } return parts.build(); } - + } else { curPath = ((JsonObject) curPath).get(pathParts[index]); logger.fine("Found next Path object " + curPath.toString()); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java index 824b486a42d..01785359e0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldType.java @@ -284,7 +284,7 @@ public void setDisplayOnCreate(boolean displayOnCreate) { } public boolean isControlledVocabulary() { - return controlledVocabularyValues != null && !controlledVocabularyValues.isEmpty(); + return allowControlledVocabulary; } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java index b6c21014f04..610bb70ff49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldValueValidator.java @@ -8,9 +8,7 @@ import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; +import java.util.*; import java.util.logging.Logger; import java.util.regex.Pattern; import jakarta.validation.ConstraintValidator; @@ -34,7 +32,6 @@ public void initialize(ValidateDatasetFieldType constraintAnnotation) { } public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext context) { - context.disableDefaultConstraintViolation(); // we do this so we can have different messages depending on the different issue boolean lengthOnly = false; @@ -55,6 +52,38 @@ public boolean isValid(DatasetFieldValue value, ConstraintValidatorContext conte return true; } + // verify no junk in individual fields and values are within range + if (dsfType.getName() != null && (dsfType.getName().equals(DatasetFieldConstant.northLatitude) || dsfType.getName().equals(DatasetFieldConstant.southLatitude) || + dsfType.getName().equals(DatasetFieldConstant.westLongitude) || dsfType.getName().equals(DatasetFieldConstant.eastLongitude))) { + try { + verifyBoundingBoxCoordinatesWithinRange(dsfType.getName(), value.getValue()); + } catch (IllegalArgumentException iae) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getDisplayName() + " " + BundleUtil.getStringFromBundle("dataset.metadata.invalidEntry")).addConstraintViolation(); + } catch (NullPointerException e) { + } + return false; + } + } + + // validate fields that are siblings and depend on each others values + if (value.getDatasetField().getParentDatasetFieldCompoundValue() != null && + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().getValidationMessage() == null) { + Optional failureMessage = validateChildConstraints(value.getDatasetField()); + if (failureMessage.isPresent()) { + try { + context.buildConstraintViolationWithTemplate(dsfType.getParentDatasetFieldType().getDisplayName() + " " + + BundleUtil.getStringFromBundle(failureMessage.get()) ).addConstraintViolation(); + + // save the failure message in the parent so we don't keep validating the children + value.getDatasetField().getParentDatasetFieldCompoundValue().getParentDatasetField().setValidationMessage(failureMessage.get()); + + } catch (NullPointerException npe) { + } + return false; + } + } + if (fieldType.equals(FieldType.TEXT) && !lengthOnly && value.getDatasetField().getDatasetFieldType().getValidationFormat() != null) { boolean valid = value.getValue().matches(value.getDatasetField().getDatasetFieldType().getValidationFormat()); if (!valid) { @@ -216,4 +245,60 @@ public boolean isValidAuthorIdentifier(String userInput, Pattern pattern) { return pattern.matcher(userInput).matches(); } + // Validate child fields against each other and return failure message or Optional.empty() if success + public Optional validateChildConstraints(DatasetField dsf) { + final String fieldName = dsf.getDatasetFieldType().getName() != null ? dsf.getDatasetFieldType().getName() : ""; + Optional returnFailureMessage = Optional.empty(); + + // Validate Child Constraint for Geospatial Bounding Box + // validate the four points of the box to insure proper layout + if (fieldName.equals(DatasetFieldConstant.northLatitude) || fieldName.equals(DatasetFieldConstant.westLongitude) + || fieldName.equals(DatasetFieldConstant.eastLongitude) || fieldName.equals(DatasetFieldConstant.southLatitude)) { + final String failureMessage = "dataset.metadata.invalidGeospatialCoordinates"; + + try { + final Map coords = new HashMap<>(); + dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields().forEach(f -> { + coords.put(f.getDatasetFieldType().getName(), f.getValue()); + }); + if (!validateBoundingBox(coords.get(DatasetFieldConstant.westLongitude), + coords.get(DatasetFieldConstant.eastLongitude), + coords.get(DatasetFieldConstant.northLatitude), + coords.get(DatasetFieldConstant.southLatitude))) { + returnFailureMessage = Optional.of(failureMessage); + } + } catch (IllegalArgumentException e) { // IllegalArgumentException NumberFormatException + returnFailureMessage = Optional.of(failureMessage); + } + } + + return returnFailureMessage; + } + + public static boolean validateBoundingBox(final String westLon, final String eastLon, final String northLat, final String southLat) { + boolean returnVal = false; + + try { + Float west = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.westLongitude, westLon); + Float east = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.eastLongitude, eastLon); + Float north = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.northLatitude, northLat); + Float south = verifyBoundingBoxCoordinatesWithinRange(DatasetFieldConstant.southLatitude, southLat); + returnVal = west <= east && south <= north; + } catch (IllegalArgumentException e) { + returnVal = false; + } + + return returnVal; + } + + private static Float verifyBoundingBoxCoordinatesWithinRange(final String name, final String value) throws IllegalArgumentException { + int max = name.equals(DatasetFieldConstant.westLongitude) || name.equals(DatasetFieldConstant.eastLongitude) ? 180 : 90; + int min = max * -1; + + final Float returnVal = value != null ? Float.parseFloat(value) : Float.NaN; + if (returnVal.isNaN() || returnVal < min || returnVal > max) { + throw new IllegalArgumentException(String.format("Value (%s) not in range (%s-%s)", returnVal.isNaN() ? "missing" : returnVal, min, max)); + } + return returnVal; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java b/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java deleted file mode 100644 index 747e3c068f1..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetKeyword.java +++ /dev/null @@ -1,68 +0,0 @@ -package edu.harvard.iq.dataverse; - -/** - * - * @author skraffmiller - */ - -public class DatasetKeyword { - - private int displayOrder; - public int getDisplayOrder() { - return this.displayOrder; - } - public void setDisplayOrder(int displayOrder) { - this.displayOrder = displayOrder; - } - - private DatasetField value; - public DatasetField getValue() { - return this.value; - } - public void setValue(DatasetField value) { - this.value = value; - } - - private DatasetVersion datasetVersion; - public DatasetVersion getDatasetVersion() { - return datasetVersion; - } - public void setDatasetVersion(DatasetVersion metadata) { - this.datasetVersion = metadata; - } - /* - @Version - private Long version; - public Long getVersion() { - return this.version; - } - public void setVersion(Long version) { - this.version = version; - } */ - - private DatasetField vocab; - public DatasetField getVocab() { - return this.vocab; - } - public void setVocab(DatasetField vocab) { - this.vocab = vocab; - } - - private DatasetField vocabURI; - public DatasetField getVocabURI() { - return this.vocabURI; - } - public void setVocabURI(DatasetField vocabURI) { - this.vocabURI = vocabURI; - } - - - public boolean isEmpty() { - /*return ((value==null || value.getValue().trim().equals("")) - && (vocab==null || vocab.getValue().trim().equals("")) - && (vocabURI==null || vocabURI.getValue().trim().equals("")));*/ - return false; - } - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index fc18257196d..eae4a9f2977 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -8,9 +8,12 @@ import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; @@ -22,6 +25,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.CheckRateLimitForDatasetPageCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand; @@ -34,13 +38,17 @@ import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import edu.harvard.iq.dataverse.ingest.IngestRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -70,6 +78,7 @@ import java.lang.reflect.Method; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -130,6 +139,7 @@ import jakarta.faces.event.AjaxBehaviorEvent; import jakarta.servlet.ServletOutputStream; import jakarta.servlet.http.HttpServletResponse; +import jakarta.servlet.http.HttpServletRequest; import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.lang3.mutable.MutableBoolean; @@ -237,12 +247,16 @@ public enum DisplayMode { SolrClientService solrClientService; @EJB DvObjectServiceBean dvObjectService; + @EJB + CacheFactoryBean cacheFactory; @Inject DataverseRequestServiceBean dvRequestService; @Inject DatasetVersionUI datasetVersionUI; @Inject PermissionsWrapper permissionsWrapper; + @Inject + NavigationWrapper navigationWrapper; @Inject FileDownloadHelper fileDownloadHelper; @Inject @@ -258,6 +272,8 @@ public enum DisplayMode { @Inject EmbargoServiceBean embargoService; @Inject + RetentionServiceBean retentionService; + @Inject LicenseServiceBean licenseServiceBean; @Inject DataFileCategoryServiceBean dataFileCategoryService; @@ -361,6 +377,8 @@ public void setSelectedHostDataverse(Dataverse selectedHostDataverse) { * other boolean. */ private boolean versionHasTabular = false; + private boolean versionHasGlobus = false; + private boolean globusTransferRequested = false; private boolean showIngestSuccess; @@ -506,7 +524,7 @@ public String getThumbnailString() { thumbnailString = datasetThumbnail.getBase64image(); } else { - thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(dataset, + thumbnailString = thumbnailServiceWrapper.getDatasetCardImageAsUrl(dataset, workingVersion.getId(), !workingVersion.isDraft(), ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); @@ -701,6 +719,16 @@ public void setNumberOfFilesToShow(Long numberOfFilesToShow) { this.numberOfFilesToShow = numberOfFilesToShow; } + private String returnReason = ""; + + public String getReturnReason() { + return returnReason; + } + + public void setReturnReason(String returnReason) { + this.returnReason = returnReason; + } + public void showAll(){ setNumberOfFilesToShow(new Long(fileMetadatasSearch.size())); } @@ -754,17 +782,54 @@ public boolean isIndexedVersion() { if (isIndexedVersion != null) { return isIndexedVersion; } + + // Just like on the collection page, facets on the Dataset page can be + // disabled instance-wide by an admin: + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return isIndexedVersion = false; + } + + // plus we have mechanisms for disabling the facets selectively, just for + // the guests, or anonymous users: + if (session.getUser() instanceof GuestUser) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForGuestUsers, false)) { + return isIndexedVersion = false; + } + + // An even lower grade of user than Guest is a truly anonymous user - + // a guest user who came without the session cookie: + Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); + if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsWithoutJsession, false)) { + return isIndexedVersion = false; + } + } + + } + + // The version is SUPPOSED to be indexed if it's the latest published version, or a - // draft. So if none of the above is true, we return false right away: - + // draft. So if none of the above is true, we can return false right away. if (!(workingVersion.isDraft() || isThisLatestReleasedVersion())) { return isIndexedVersion = false; } - - // ... but if it is the latest published version or a draft, we want to test - // and confirm that this version *has* actually been indexed and is searchable - // (and that solr is actually up and running!), by running a quick solr search: - return isIndexedVersion = isThisVersionSearchable(); + // If this is the latest published version, we want to confirm that this + // version was successfully indexed after the last publication + if (isThisLatestReleasedVersion()) { + if (workingVersion.getDataset().getIndexTime() == null) { + return isIndexedVersion = false; + } + // We add 3 hours to the indexed time to prevent false negatives + // when indexed time gets overwritten in finalizing the publication step + // by a value before the release time + final long duration = 3 * 60 * 60 * 1000; + final Timestamp movedIndexTime = new Timestamp(workingVersion.getDataset().getIndexTime().getTime() + duration); + return isIndexedVersion = movedIndexTime.after(workingVersion.getReleaseTime()); + } + + // Drafts don't have the indextime stamps set/incremented when indexed, + // so we'll just assume it is indexed, and will then hope for the best. + return isIndexedVersion = true; } /** @@ -820,8 +885,18 @@ public List getFileTagsFacetLabels() { /** * Verifies that solr is running and that the version is indexed and searchable * @return boolean - */ + * Commenting out this method for now, since we have decided it was not + * necessary, to query solr just to figure out if we can query solr. We will + * rely solely on the latest-relesed status and the indexed timestamp from + * the database for that. - L.A. + * public boolean isThisVersionSearchable() { + // Just like on the collection page, facets on the Dataset page can be + // disabled instance-wide by an admin: + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return false; + } + SolrQuery solrQuery = new SolrQuery(); solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, workingVersion.getDataset().getId().toString())); @@ -856,6 +931,7 @@ public boolean isThisVersionSearchable() { return false; } + */ /** * Finds the list of numeric datafile ids in the Version specified, by running @@ -967,10 +1043,19 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte logger.fine("Remote Solr Exception: " + ex.getLocalizedMessage()); String msg = ex.getLocalizedMessage(); if (msg.contains(SearchFields.FILE_DELETED)) { + // This is a backward compatibility hook put in place many versions + // ago, to accommodate instances running Solr with schemas that + // don't include this flag yet. Running Solr with an up-to-date + // schema has been a hard requirement for a while now; should we + // remove it at this point? - L.A. fileDeletedFlagNotIndexed = true; + } else { + isIndexedVersion = false; + return resultIds; } } catch (Exception ex) { logger.warning("Solr exception: " + ex.getLocalizedMessage()); + isIndexedVersion = false; return resultIds; } @@ -983,6 +1068,7 @@ public Set getFileIdsInVersionFromSolr(Long datasetVersionId, String patte queryResponse = solrClientService.getSolrClient().query(solrQuery); } catch (Exception ex) { logger.warning("Caught a Solr exception (again!): " + ex.getLocalizedMessage()); + isIndexedVersion = false; return resultIds; } } @@ -1172,8 +1258,17 @@ public boolean canDownloadFiles() { canDownloadFiles = false; for (FileMetadata fmd : workingVersion.getFileMetadatas()) { if (fileDownloadHelper.canDownloadFile(fmd)) { - canDownloadFiles = true; - break; + if (isVersionHasGlobus()) { + String driverId = DataAccess + .getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + if (StorageIO.isDataverseAccessible(driverId)) { + canDownloadFiles = true; + break; + } + } else { + canDownloadFiles = true; + break; + } } } } @@ -1880,15 +1975,15 @@ private void setIdByPersistentId() { } private String init(boolean initFull) { - + // Check for rate limit exceeded. Must be done before anything else to prevent unnecessary processing. + if (!cacheFactory.checkRate(session.getUser(), new CheckRateLimitForDatasetPageCommand(null,null))) { + return navigationWrapper.tooManyRequests(); + } //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes setDataverseSiteUrl(systemConfig.getDataverseSiteUrl()); guestbookResponse = new GuestbookResponse(); - String nonNullDefaultIfKeyNotFound = ""; - protocol = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - authority = settingsWrapper.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); String sortOrder = getSortOrder(); if(sortOrder != null) { FileMetadata.setCategorySortOrder(sortOrder); @@ -1988,7 +2083,7 @@ private String init(boolean initFull) { // to the local 404 page, below. logger.warning("failed to issue a redirect to "+originalSourceURL); } - return originalSourceURL; + return null; } return permissionsWrapper.notFound(); @@ -2070,8 +2165,6 @@ private String init(boolean initFull) { editMode = EditMode.CREATE; selectedHostDataverse = dataverseService.find(ownerId); dataset.setOwner(selectedHostDataverse); - dataset.setProtocol(protocol); - dataset.setAuthority(authority); if (dataset.getOwner() == null) { return permissionsWrapper.notFound(); @@ -2081,9 +2174,9 @@ private String init(boolean initFull) { //Wait until the create command before actually getting an identifier, except if we're using directUpload //Need to assign an identifier prior to calls to requestDirectUploadUrl if direct upload is used. if ( isEmpty(dataset.getIdentifier()) && systemConfig.directUploadEnabled(dataset) ) { - CommandContext ctxt = commandEngine.getContext(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - dataset.setIdentifier(idServiceBean.generateDatasetIdentifier(dataset)); + CommandContext ctxt = commandEngine.getContext(); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + pidProvider.generatePid(dataset); } dataverseTemplates.addAll(dataverseService.find(ownerId).getTemplates()); if (!dataverseService.find(ownerId).isTemplateRoot()) { @@ -2143,6 +2236,11 @@ private String init(boolean initFull) { } } + LocalDate minRetentiondate = settingsWrapper.getMinRetentionDate(); + if (minRetentiondate != null){ + selectionRetention.setDateUnavailable(minRetentiondate.plusDays(1L)); + } + displayLockInfo(dataset); displayPublishMessage(); @@ -2150,10 +2248,19 @@ private String init(boolean initFull) { // the total "originals" size of the dataset with direct custom queries; // then we'll be able to drop the lookup hint for DataTable from the // findDeep() method for the version and further speed up the lookup - // a little bit. + // a little bit. + boolean globusDownloadEnabled = systemConfig.isGlobusDownload(); for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (fmd.getDataFile().isTabularData()) { + DataFile df = fmd.getDataFile(); + if (df.isTabularData()) { versionHasTabular = true; + } + if(globusDownloadEnabled) { + if(GlobusAccessibleStore.isGlobusAccessible(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + versionHasGlobus= true; + } + } + if(versionHasTabular &&(!globusDownloadEnabled || versionHasGlobus)) { break; } } @@ -2210,13 +2317,11 @@ private void displayPublishMessage(){ public boolean isValid() { if (valid == null) { - DatasetVersion version = dataset.getLatestVersion(); - if (!version.isDraft()) { + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true))) { + valid = workingVersion.isValid(); + } else { valid = true; } - DatasetVersion newVersion = version.cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); - valid = newVersion.isValid(); } return valid; } @@ -2279,14 +2384,17 @@ private void displayLockInfo(Dataset dataset) { lockedDueToIngestVar = true; } - // With DataCite, we try to reserve the DOI when the dataset is created. Sometimes this - // fails because DataCite is down. We show the message below to set expectations that the - // "Publish" button won't work until the DOI has been reserved using the "Reserve PID" API. - if (settingsWrapper.isDataCiteInstallation() && dataset.getGlobalIdCreateTime() == null && editMode != EditMode.CREATE) { - JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message"), - BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message.details")); + if (dataset.getGlobalIdCreateTime() == null && editMode != EditMode.CREATE) { + // With DataCite, we try to reserve the DOI when the dataset is created. Sometimes this + // fails because DataCite is down. We show the message below to set expectations that the + // "Publish" button won't work until the DOI has been reserved using the "Reserve PID" API. + PidProvider pidProvider = PidUtil.getPidProvider(dataset.getGlobalId().getProviderId()); + if (DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) { + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message"), + BundleUtil.getStringFromBundle("dataset.locked.pidNotReserved.message.details")); + } } - //if necessary refresh publish message also displayPublishMessage(); @@ -2450,6 +2558,10 @@ private DefaultTreeNode createFileTreeNode(FileMetadata fileMetadata, TreeNode p public boolean isVersionHasTabular() { return versionHasTabular; } + + public boolean isVersionHasGlobus() { + return versionHasGlobus; + } public boolean isReadOnly() { return readOnly; @@ -2601,8 +2713,7 @@ public void edit(EditMode editMode) { public String sendBackToContributor() { try { - //FIXME - Get Return Comment from sendBackToContributor popup - Command cmd = new ReturnDatasetToAuthorCommand(dvRequestService.getDataverseRequest(), dataset, ""); + Command cmd = new ReturnDatasetToAuthorCommand(dvRequestService.getDataverseRequest(), dataset, returnReason); dataset = commandEngine.submit(cmd); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.reject.success")); } catch (CommandException ex) { @@ -3056,6 +3167,26 @@ public void setSelectedNonDownloadableFiles(List selectedNonDownlo this.selectedNonDownloadableFiles = selectedNonDownloadableFiles; } + private List selectedGlobusTransferableFiles; + + public List getSelectedGlobusTransferableFiles() { + return selectedGlobusTransferableFiles; + } + + public void setSelectedGlobusTransferableFiles(List selectedGlobusTransferableFiles) { + this.selectedGlobusTransferableFiles = selectedGlobusTransferableFiles; + } + + private List selectedNonGlobusTransferableFiles; + + public List getSelectedNonGlobusTransferableFiles() { + return selectedNonGlobusTransferableFiles; + } + + public void setSelectedNonGlobusTransferableFiles(List selectedNonGlobusTransferableFiles) { + this.selectedNonGlobusTransferableFiles = selectedNonGlobusTransferableFiles; + } + public String getSizeOfDataset() { return DatasetUtil.getDownloadSize(workingVersion, false); } @@ -3165,9 +3296,9 @@ public void startDownloadSelectedOriginal() { private void startDownload(boolean downloadOriginal){ boolean guestbookRequired = isDownloadPopupRequired(); - boolean validate = validateFilesForDownload(downloadOriginal); + boolean validate = validateFilesForDownload(downloadOriginal, false); if (validate) { - updateGuestbookResponse(guestbookRequired, downloadOriginal); + updateGuestbookResponse(guestbookRequired, downloadOriginal, false); if(!guestbookRequired && !getValidateFilesOutcome().equals("Mixed")){ startMultipleFileDownload(); } @@ -3188,7 +3319,7 @@ public void setValidateFilesOutcome(String validateFilesOutcome) { this.validateFilesOutcome = validateFilesOutcome; } - public boolean validateFilesForDownload(boolean downloadOriginal){ + public boolean validateFilesForDownload(boolean downloadOriginal, boolean isGlobusTransfer){ if (this.selectedFiles.isEmpty()) { PrimeFaces.current().executeScript("PF('selectFilesForDownload').show()"); return false; @@ -3205,35 +3336,43 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ return false; } - for (FileMetadata fmd : getSelectedDownloadableFiles()) { - DataFile dataFile = fmd.getDataFile(); - if (downloadOriginal && dataFile.isTabularData()) { - bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); - } else { - bytes += dataFile.getFilesize(); + if (!isGlobusTransfer) { + for (FileMetadata fmd : getSelectedDownloadableFiles()) { + DataFile dataFile = fmd.getDataFile(); + if (downloadOriginal && dataFile.isTabularData()) { + bytes += dataFile.getOriginalFileSize() == null ? 0 : dataFile.getOriginalFileSize(); + } else { + bytes += dataFile.getFilesize(); + } } - } - //if there are two or more files with a total size - //over the zip limit post a "too large" popup - if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) { - setValidateFilesOutcome("FailSize"); - return false; + // if there are two or more files, with a total size + // over the zip limit, post a "too large" popup + if (bytes > settingsWrapper.getZipDownloadLimit() && selectedDownloadableFiles.size() > 1) { + setValidateFilesOutcome("FailSize"); + return false; + } } - + // If some of the files were restricted and we had to drop them off the // list, and NONE of the files are left on the downloadable list - // - we show them a "you're out of luck" popup: - if (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + // - we show them a "you're out of luck" popup + // Same for globus transfer + if ((!isGlobusTransfer + && (getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty())) + || (isGlobusTransfer && (getSelectedGlobusTransferableFiles().isEmpty() + && !getSelectedNonGlobusTransferableFiles().isEmpty()))) { setValidateFilesOutcome("FailRestricted"); return false; } - if (!getSelectedDownloadableFiles().isEmpty() && !getSelectedNonDownloadableFiles().isEmpty()) { + //For download or transfer, there are some that can be downloaded/transferred and some that can't + if ((!isGlobusTransfer && (!getSelectedNonDownloadableFiles().isEmpty() && !getSelectedDownloadableFiles().isEmpty())) || + (isGlobusTransfer && (!getSelectedNonGlobusTransferableFiles().isEmpty() && !getSelectedGlobusTransferableFiles().isEmpty()))) { setValidateFilesOutcome("Mixed"); return true; } - + //ToDo - should Mixed not trigger this? if (isTermsPopupRequired() || isGuestbookPopupRequiredAtDownload()) { setValidateFilesOutcome("GuestbookRequired"); } @@ -3241,15 +3380,23 @@ public boolean validateFilesForDownload(boolean downloadOriginal){ } - private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal) { + private void updateGuestbookResponse (boolean guestbookRequired, boolean downloadOriginal, boolean isGlobusTransfer) { // Note that the GuestbookResponse object may still have information from // the last download action performed by the user. For example, it may // still have the non-null Datafile in it, if the user has just downloaded // a single file; or it may still have the format set to "original" - // even if that's not what they are trying to do now. // So make sure to reset these values: - guestbookResponse.setDataFile(null); - guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + if(fileMetadataForAction == null) { + guestbookResponse.setDataFile(null); + } else { + guestbookResponse.setDataFile(fileMetadataForAction.getDataFile()); + } + if(isGlobusTransfer) { + guestbookResponse.setSelectedFileIds(getFilesIdsString(getSelectedGlobusTransferableFiles())); + } else { + guestbookResponse.setSelectedFileIds(getSelectedDownloadableFilesIdsString()); + } if (downloadOriginal) { guestbookResponse.setFileFormat("original"); } else { @@ -3269,15 +3416,32 @@ private boolean filterSelectedFiles(){ setSelectedNonDownloadableFiles(new ArrayList<>()); setSelectedRestrictedFiles(new ArrayList<>()); setSelectedUnrestrictedFiles(new ArrayList<>()); + setSelectedGlobusTransferableFiles(new ArrayList<>()); + setSelectedNonGlobusTransferableFiles(new ArrayList<>()); boolean someFiles = false; + boolean globusDownloadEnabled = settingsWrapper.isGlobusDownload(); for (FileMetadata fmd : this.selectedFiles){ - if(this.fileDownloadHelper.canDownloadFile(fmd)){ + boolean downloadable=this.fileDownloadHelper.canDownloadFile(fmd); + + boolean globusTransferable = false; + if(globusDownloadEnabled) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + downloadable = downloadable && StorageIO.isDataverseAccessible(driverId); + } + if(downloadable){ getSelectedDownloadableFiles().add(fmd); someFiles=true; } else { getSelectedNonDownloadableFiles().add(fmd); } + if(globusTransferable) { + getSelectedGlobusTransferableFiles().add(fmd); + someFiles=true; + } else { + getSelectedNonGlobusTransferableFiles().add(fmd); + } if(fmd.isRestricted()){ getSelectedRestrictedFiles().add(fmd); //might be downloadable to user or not someFiles=true; @@ -3562,6 +3726,25 @@ public String deleteFiles() throws CommandException{ } } + //Remove retentions that are no longer referenced + //Identify which ones are involved here + List orphanedRetentions = new ArrayList(); + if (selectedFiles != null && selectedFiles.size() > 0) { + for (FileMetadata fmd : workingVersion.getFileMetadatas()) { + for (FileMetadata fm : selectedFiles) { + if (fm.getDataFile().equals(fmd.getDataFile()) && !fmd.getDataFile().isReleased()) { + Retention ret = fmd.getDataFile().getRetention(); + if (ret != null) { + ret.getDataFiles().remove(fmd.getDataFile()); + if (ret.getDataFiles().isEmpty()) { + orphanedRetentions.add(ret); + } + } + } + } + } + } + deleteFiles(filesToDelete); String retVal; @@ -3571,12 +3754,14 @@ public String deleteFiles() throws CommandException{ } else { retVal = save(); } - - - //And delete them only after the dataset is updated + + // And delete them only after the dataset is updated for(Embargo emb: orphanedEmbargoes) { embargoService.deleteById(emb.getId(), ((AuthenticatedUser)session.getUser()).getUserIdentifier()); } + for(Retention ret: orphanedRetentions) { + retentionService.delete(ret, ((AuthenticatedUser)session.getUser()).getUserIdentifier()); + } return retVal; } @@ -3766,12 +3951,6 @@ public String save() { ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); } dataset = commandEngine.submit(cmd); - for (DatasetField df : dataset.getLatestVersion().getFlatDatasetFields()) { - logger.fine("Found id: " + df.getDatasetFieldType().getId()); - if (fieldService.getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { - fieldService.registerExternalVocabValues(df); - } - } if (editMode == EditMode.CREATE) { if (session.getUser() instanceof AuthenticatedUser) { userNotificationService.sendNotification((AuthenticatedUser) session.getUser(), dataset.getCreateDate(), UserNotification.Type.CREATEDS, dataset.getLatestVersion().getId()); @@ -5215,35 +5394,6 @@ public boolean isFileAccessRequestMultiButtonEnabled(){ return false; } - private Boolean downloadButtonAllEnabled = null; - - public boolean isDownloadAllButtonEnabled() { - - if (downloadButtonAllEnabled == null) { - for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - if (!this.fileDownloadHelper.canDownloadFile(fmd)) { - downloadButtonAllEnabled = false; - break; - } - } - downloadButtonAllEnabled = true; - } - return downloadButtonAllEnabled; - } - - public boolean isDownloadSelectedButtonEnabled(){ - - if( this.selectedFiles == null || this.selectedFiles.isEmpty() ){ - return false; - } - for (FileMetadata fmd : this.selectedFiles){ - if (this.fileDownloadHelper.canDownloadFile(fmd)){ - return true; - } - } - return false; - } - public boolean isFileAccessRequestMultiSignUpButtonRequired(){ if (isSessionUserAuthenticated()){ return false; @@ -5272,7 +5422,7 @@ public boolean isFileAccessRequestMultiSignUpButtonEnabled(){ return false; } for (FileMetadata fmd : this.selectedRestrictedFiles){ - if (!this.fileDownloadHelper.canDownloadFile(fmd)&& !FileUtil.isActivelyEmbargoed(fmd)){ + if (!this.fileDownloadHelper.canDownloadFile(fmd) && !FileUtil.isActivelyEmbargoed(fmd)){ return true; } } @@ -5633,7 +5783,10 @@ public boolean isShowPreviewButton(Long fileId) { public boolean isShowQueryButton(Long fileId) { DataFile dataFile = datafileService.find(fileId); - if(dataFile.isRestricted() || !dataFile.isReleased() || FileUtil.isActivelyEmbargoed(dataFile)){ + if(dataFile.isRestricted() + || !dataFile.isReleased() + || FileUtil.isActivelyEmbargoed(dataFile) + || FileUtil.isRetentionExpired(dataFile)){ return false; } @@ -5731,6 +5884,19 @@ public boolean isThisLatestReleasedVersion() { } + public String getCroissant() { + if (isThisLatestReleasedVersion()) { + final String CROISSANT_SCHEMA_NAME = "croissant"; + ExportService instance = ExportService.getInstance(); + String croissant = instance.getExportAsString(dataset, CROISSANT_SCHEMA_NAME); + if (croissant != null && !croissant.isEmpty()) { + logger.fine("Returning cached CROISSANT."); + return croissant; + } + } + return null; + } + public String getJsonLd() { if (isThisLatestReleasedVersion()) { ExportService instance = ExportService.getInstance(); @@ -5910,14 +6076,7 @@ public void setFolderPresort(boolean folderPresort) { public void explore(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); - } + apiToken = authService.getValidApiTokenForUser(user); ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode()); PrimeFaces.current().executeScript(externalToolHandler.getExploreScript()); } @@ -5925,8 +6084,9 @@ public void explore(ExternalTool externalTool) { public void configure(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); + //Not enabled for PrivateUrlUsers (who wouldn't have write permissions anyway) if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); + apiToken = authService.getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user); } ExternalToolHandler externalToolHandler = new ExternalToolHandler(externalTool, dataset, apiToken, session.getLocaleCode()); PrimeFaces.current().executeScript(externalToolHandler.getConfigureScript()); @@ -6184,12 +6344,18 @@ public void clearSelectionEmbargo() { PrimeFaces.current().resetInputs("datasetForm:embargoInputs"); } - public boolean isCantDownloadDueToEmbargo() { + public boolean isCantDownloadDueToEmbargoOrDVAccess() { if (getSelectedNonDownloadableFiles() != null) { for (FileMetadata fmd : getSelectedNonDownloadableFiles()) { if (FileUtil.isActivelyEmbargoed(fmd)) { return true; } + if (isVersionHasGlobus()) { + if (StorageIO.isDataverseAccessible( + DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()))) { + return true; + } + } } } return false; @@ -6215,6 +6381,195 @@ private boolean containsOnlyActivelyEmbargoedFiles(List selectedFi return true; } + public Retention getSelectionRetention() { + return selectionRetention; + } + + public void setSelectionRetention(Retention selectionRetention) { + this.selectionRetention = selectionRetention; + } + + + private Retention selectionRetention = new Retention(); + + public boolean isValidRetentionSelection() { + //If fileMetadataForAction is set, someone is using the kebab/single file menu + if (fileMetadataForAction != null) { + if (!fileMetadataForAction.getDataFile().isReleased()) { + return true; + } else { + return false; + } + } + //Otherwise we check the selected files + for (FileMetadata fmd : selectedFiles) { + if (!fmd.getDataFile().isReleased()) { + return true; + } + } + return false; + } + + /* + * This method checks to see if the selected file/files have a retention that could be removed. It doesn't return true of a released file has a retention. + */ + public boolean isExistingRetention() { + if (fileMetadataForAction != null) { + if (!fileMetadataForAction.getDataFile().isReleased() + && (fileMetadataForAction.getDataFile().getRetention() != null)) { + return true; + } else { + return false; + } + } + for (FileMetadata fmd : selectedFiles) { + if (!fmd.getDataFile().isReleased() && (fmd.getDataFile().getRetention() != null)) { + return true; + } + } + + return false; + } + + public boolean isRetentionExpired(List fmdList) { + return FileUtil.isRetentionExpired(fmdList); + } + + public boolean isRetentionForWholeSelection() { + for (FileMetadata fmd : selectedFiles) { + if (fmd.getDataFile().isReleased()) { + return false; + } + } + return true; + } + + private boolean removeRetention=false; + + public boolean isRemoveRetention() { + return removeRetention; + } + + public void setRemoveRetention(boolean removeRetention) { + boolean existing = this.removeRetention; + this.removeRetention = removeRetention; + //If we flipped the state, update the selectedRetention. Otherwise (e.g. when save is hit) don't make changes + if(existing != this.removeRetention) { + logger.fine("State flip"); + selectionRetention= new Retention(); + if(removeRetention) { + logger.fine("Setting empty retention"); + selectionRetention= new Retention(null, null); + } + PrimeFaces.current().resetInputs("datasetForm:retentionInputs"); + } + } + + public String saveRetention() { + if (workingVersion.isReleased()) { + refreshSelectedFiles(selectedFiles); + } + + if(isRemoveRetention() || (selectionRetention.getDateUnavailable()==null && selectionRetention.getReason()==null)) { + selectionRetention=null; + } + + if(!(selectionRetention==null || (selectionRetention!=null && settingsWrapper.isValidRetentionDate(selectionRetention)))) { + logger.fine("Validation error: " + selectionRetention.getFormattedDateUnavailable()); + FacesContext.getCurrentInstance().validationFailed(); + return ""; + } + List orphanedRetentions = new ArrayList(); + List retentionFMs = null; + if (fileMetadataForAction != null) { + retentionFMs = new ArrayList(); + retentionFMs.add(fileMetadataForAction); + } else if (selectedFiles != null && selectedFiles.size() > 0) { + retentionFMs = selectedFiles; + } + + if(retentionFMs!=null && !retentionFMs.isEmpty()) { + if(selectionRetention!=null) { + selectionRetention = retentionService.merge(selectionRetention); + } + for (FileMetadata fmd : workingVersion.getFileMetadatas()) { + for (FileMetadata fm : retentionFMs) { + if (fm.getDataFile().equals(fmd.getDataFile()) && (isSuperUser()||!fmd.getDataFile().isReleased())) { + Retention ret = fmd.getDataFile().getRetention(); + if (ret != null) { + logger.fine("Before: " + ret.getDataFiles().size()); + ret.getDataFiles().remove(fmd.getDataFile()); + if (ret.getDataFiles().isEmpty()) { + orphanedRetentions.add(ret); + } + logger.fine("After: " + ret.getDataFiles().size()); + } + fmd.getDataFile().setRetention(selectionRetention); + } + } + } + } + if (selectionRetention != null) { + retentionService.save(selectionRetention, ((AuthenticatedUser) session.getUser()).getIdentifier()); + } + // success message: + String successMessage = BundleUtil.getStringFromBundle("file.assignedRetention.success"); + logger.fine(successMessage); + successMessage = successMessage.replace("{0}", "Selected Files"); + JsfHelper.addFlashMessage(successMessage); + selectionRetention = new Retention(); + + save(); + for(Retention ret: orphanedRetentions) { + retentionService.delete(ret, ((AuthenticatedUser)session.getUser()).getUserIdentifier()); + } + return returnToDraftVersion(); + } + + public void clearRetentionPopup() { + logger.fine("clearRetentionPopup called"); + selectionRetention= new Retention(); + setRemoveRetention(false); + PrimeFaces.current().resetInputs("datasetForm:retentionInputs"); + } + + public void clearSelectionRetention() { + logger.fine("clearSelectionRetention called"); + selectionRetention= new Retention(); + PrimeFaces.current().resetInputs("datasetForm:retentionInputs"); + } + + public boolean isCantDownloadDueToRetention() { + if (getSelectedNonDownloadableFiles() != null) { + for (FileMetadata fmd : getSelectedNonDownloadableFiles()) { + if (FileUtil.isRetentionExpired(fmd)) { + return true; + } + } + } + return false; + } + + public boolean isCantRequestDueToRetention() { + if (fileDownloadHelper.getFilesForRequestAccess() != null) { + for (DataFile df : fileDownloadHelper.getFilesForRequestAccess()) { + if (FileUtil.isRetentionExpired(df)) { + return true; + } + } + } + return false; + } + + private boolean containsOnlyRetentionExpiredFiles(List selectedFiles) { + for (FileMetadata fmd : selectedFiles) { + if (!FileUtil.isRetentionExpired(fmd)) { + return false; + } + } + return true; + } + public String getIngestMessage() { return BundleUtil.getStringFromBundle("file.ingestFailed.message", Arrays.asList(settingsWrapper.getGuidesBaseUrl(), settingsWrapper.getGuidesVersion())); } @@ -6250,18 +6605,50 @@ public boolean isHasPublicStore() { return settingsWrapper.isTrueForKey(SettingsServiceBean.Key.PublicInstall, StorageIO.isPublicStore(dataset.getEffectiveStorageDriverId())); } - public void startGlobusTransfer() { - ApiToken apiToken = null; - User user = session.getUser(); - if (user instanceof AuthenticatedUser) { - apiToken = authService.findApiTokenByUser((AuthenticatedUser) user); - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privUrl.getToken()); + public boolean isGlobusTransferRequested() { + return globusTransferRequested; + } + + /** + * Analagous with the startDownload method, this method is called when the user + * tries to start a Globus transfer out (~download). The + * validateFilesForDownload call checks to see if there are some files that can + * be Globus transfered and, if so and there are no files that can't be + * transferre, this method will launch the globus transfer app. If there is a + * mix of files or if the guestbook popup is required, the method passes back to + * the UI so those popup(s) can be shown. Once they are, this method is called + * with the popupShown param true and the app will be shown. + * + * @param transferAll - when called from the dataset Access menu, this should be + * true so that all files are included in the processing. + * When it is called from the file table, the current + * selection is used and the param should be false. + * @param popupShown - This method is called twice if the the mixed files or + * guestbook popups are needed. On the first call, popupShown + * is false so that the transfer is not started and those + * popups can be shown. On the second call, popupShown is + * true and processing will occur as long as there are some + * valid files to transfer. + */ + public void startGlobusTransfer(boolean transferAll, boolean popupShown) { + if (transferAll) { + this.setSelectedFiles(workingVersion.getFileMetadatas()); + } + boolean guestbookRequired = isDownloadPopupRequired(); + + boolean validated = validateFilesForDownload(true, true); + + if (validated) { + globusTransferRequested = true; + boolean mixed = "Mixed".equals(getValidateFilesOutcome()); + // transfer is + updateGuestbookResponse(guestbookRequired, true, true); + if ((!guestbookRequired && !mixed) || popupShown) { + boolean doNotSaveGuestbookResponse = workingVersion.isDraft(); + globusService.writeGuestbookAndStartTransfer(guestbookResponse, doNotSaveGuestbookResponse); + globusTransferRequested = false; + } } - PrimeFaces.current().executeScript(globusService.getGlobusDownloadScript(dataset, apiToken)); } public String getWebloaderUrlForDataset(Dataset d) { @@ -6301,5 +6688,9 @@ public String getSignpostingLinkHeader() { } return signpostingLinkHeader; } + + public boolean isDOI() { + return AbstractDOIProvider.DOI_PROTOCOL.equals(dataset.getGlobalId().getProtocol()); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index c6df2a2e1ab..dab0ff43fcf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -39,11 +39,10 @@ import jakarta.ejb.TransactionAttributeType; import jakarta.inject.Named; import jakarta.persistence.EntityManager; -import jakarta.persistence.LockModeType; import jakarta.persistence.NoResultException; +import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.Query; -import jakarta.persistence.StoredProcedureQuery; import jakarta.persistence.TypedQuery; import org.apache.commons.lang3.StringUtils; @@ -61,9 +60,6 @@ public class DatasetServiceBean implements java.io.Serializable { @EJB IndexServiceBean indexService; - @EJB - DOIEZIdServiceBean doiEZIdServiceBean; - @EJB SettingsServiceBean settingsService; @@ -116,26 +112,32 @@ public Dataset find(Object pk) { * @return a dataset with pre-fetched file objects */ public Dataset findDeep(Object pk) { - return (Dataset) em.createNamedQuery("Dataset.findById") - .setParameter("id", pk) - // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files - .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") - .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") - .setHint("eclipselink.left-join-fetch", "o.files.dataTables") - .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") - .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") - .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") - .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") - .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") - //.setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses") - .setHint("eclipselink.left-join-fetch", "o.files.embargo") - .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") - .setHint("eclipselink.left-join-fetch", "o.files.owner") - .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") - .setHint("eclipselink.left-join-fetch", "o.files.creator") - .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") - .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") - .getSingleResult(); + try { + return (Dataset) em.createNamedQuery("Dataset.findById") + .setParameter("id", pk) + // Optimization hints: retrieve all data in one query; this prevents point queries when iterating over the files + .setHint("eclipselink.left-join-fetch", "o.files.ingestRequest") + .setHint("eclipselink.left-join-fetch", "o.files.thumbnailForDataset") + .setHint("eclipselink.left-join-fetch", "o.files.dataTables") + .setHint("eclipselink.left-join-fetch", "o.files.auxiliaryFiles") + .setHint("eclipselink.left-join-fetch", "o.files.ingestReports") + .setHint("eclipselink.left-join-fetch", "o.files.dataFileTags") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories") + .setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.varGroups") + //.setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses + .setHint("eclipselink.left-join-fetch", "o.files.embargo") + .setHint("eclipselink.left-join-fetch", "o.files.retention") + .setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests") + .setHint("eclipselink.left-join-fetch", "o.files.owner") + .setHint("eclipselink.left-join-fetch", "o.files.releaseUser") + .setHint("eclipselink.left-join-fetch", "o.files.creator") + .setHint("eclipselink.left-join-fetch", "o.files.alternativePersistentIndentifiers") + .setHint("eclipselink.left-join-fetch", "o.files.roleAssignments") + .getSingleResult(); + } catch (NoResultException | NonUniqueResultException ex) { + return null; + } } public List findByOwnerId(Long ownerId) { @@ -700,7 +702,7 @@ public void exportAllDatasets(boolean forceReExport) { Integer countError = 0; String logTimestamp = logFormatter.format(new Date()); Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.DatasetServiceBean." + "ExportAll" + logTimestamp); - String logFileName = "../logs" + File.separator + "export_" + logTimestamp + ".log"; + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "export_" + logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; try { @@ -861,18 +863,33 @@ public Dataset setDatasetFileAsThumbnail(Dataset dataset, DataFile datasetFileTh logger.fine("In setDatasetFileAsThumbnail but dataset is null! Returning null."); return null; } + // Just in case the previously designated thumbnail for the dataset was + // a "custom" kind, i.e. an uploaded "dataset_logo" file, the following method + // will try to delete it, and all the associated caches here (because there + // are no other uses for the file). This method is apparently called in all + // cases, without trying to check if the dataset was in fact using a custom + // logo; probably under the assumption that it can't hurt. DatasetUtil.deleteDatasetLogo(dataset); dataset.setThumbnailFile(datasetFileThumbnailToSwitchTo); dataset.setUseGenericThumbnail(false); return merge(dataset); } - public Dataset removeDatasetThumbnail(Dataset dataset) { + public Dataset clearDatasetLevelThumbnail(Dataset dataset) { if (dataset == null) { - logger.fine("In removeDatasetThumbnail but dataset is null! Returning null."); + logger.fine("In clearDatasetLevelThumbnail but dataset is null! Returning null."); return null; } + + // Just in case the thumbnail that was designated for the dataset was + // a "custom logo" kind, i.e. an uploaded "dataset_logo" file, the following method + // will try to delete it, and all the associated caches here (because there + // are no other uses for the file). This method is apparently called in all + // cases, without trying to check if the dataset was in fact using a custom + // logo; probably under the assumption that it can't hurt. DatasetUtil.deleteDatasetLogo(dataset); + + // Clear any designated thumbnails for the dataset: dataset.setThumbnailFile(null); dataset.setUseGenericThumbnail(true); return merge(dataset); @@ -940,80 +957,6 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont } } - /* - Experimental asynchronous method for requesting persistent identifiers for - datafiles. We decided not to run this method on upload/create (so files - will not have persistent ids while in draft; when the draft is published, - we will force obtaining persistent ids for all the files in the version. - - If we go back to trying to register global ids on create, care will need to - be taken to make sure the asynchronous changes below are not conflicting with - the changes from file ingest (which may be happening in parallel, also - asynchronously). We would also need to lock the dataset (similarly to how - tabular ingest logs the dataset), to prevent the user from publishing the - version before all the identifiers get assigned - otherwise more conflicts - are likely. (It sounds like it would make sense to treat these two tasks - - persistent identifiers for files and ingest - as one post-upload job, so that - they can be run in sequence). -- L.A. Mar. 2018 - */ - @Asynchronous - public void obtainPersistentIdentifiersForDatafiles(Dataset dataset) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(dataset.getProtocol(), commandEngine.getContext()); - - //If the Id type is sequential and Dependent then write file idenitifiers outside the command - String datasetIdentifier = dataset.getIdentifier(); - Long maxIdentifier = null; - - if (systemConfig.isDataFilePIDSequentialDependent()) { - maxIdentifier = getMaximumExistingDatafileIdentifier(dataset); - } - - for (DataFile datafile : dataset.getFiles()) { - logger.info("Obtaining persistent id for datafile id=" + datafile.getId()); - - if (datafile.getIdentifier() == null || datafile.getIdentifier().isEmpty()) { - - logger.info("Obtaining persistent id for datafile id=" + datafile.getId()); - - if (maxIdentifier != null) { - maxIdentifier++; - datafile.setIdentifier(datasetIdentifier + "/" + maxIdentifier.toString()); - } else { - datafile.setIdentifier(idServiceBean.generateDataFileIdentifier(datafile)); - } - - if (datafile.getProtocol() == null) { - datafile.setProtocol(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, "")); - } - if (datafile.getAuthority() == null) { - datafile.setAuthority(settingsService.getValueForKey(SettingsServiceBean.Key.Authority, "")); - } - - logger.info("identifier: " + datafile.getIdentifier()); - - String doiRetString; - - try { - logger.log(Level.FINE, "creating identifier"); - doiRetString = idServiceBean.createIdentifier(datafile); - } catch (Throwable e) { - logger.log(Level.WARNING, "Exception while creating Identifier: " + e.getMessage(), e); - doiRetString = ""; - } - - // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(datafile.getIdentifier())) { - datafile.setIdentifierRegistered(true); - datafile.setGlobalIdCreateTime(new Date()); - } - - DataFile merged = em.merge(datafile); - merged = null; - } - - } - } - public long findStorageSize(Dataset dataset) throws IOException { return findStorageSize(dataset, false, GetDatasetStorageSizeCommand.Mode.STORAGE, null); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 5fd963f3931..943693355a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1728,7 +1728,36 @@ public List> validateRequired() { } public boolean isValid() { - return validate().isEmpty(); + // first clone to leave the original untouched + final DatasetVersion newVersion = this.cloneDatasetVersion(); + // initDatasetFields + newVersion.setDatasetFields(newVersion.initDatasetFields()); + // remove special "N/A" values and empty values + newVersion.removeEmptyValues(); + // check validity of present fields and detect missing mandatory fields + return newVersion.validate().isEmpty(); + } + + private void removeEmptyValues() { + if (this.getDatasetFields() != null) { + for (DatasetField dsf : this.getDatasetFields()) { + removeEmptyValues(dsf); + } + } + } + + private void removeEmptyValues(DatasetField dsf) { + if (dsf.getDatasetFieldType().isPrimitive()) { // primitive + final Iterator i = dsf.getDatasetFieldValues().iterator(); + while (i.hasNext()) { + final String v = i.next().getValue(); + if (StringUtils.isBlank(v) || DatasetField.NA_VALUE.equals(v)) { + i.remove(); + } + } + } else { + dsf.getDatasetFieldCompoundValues().forEach(cv -> cv.getChildDatasetFields().forEach(v -> removeEmptyValues(v))); + } } public Set validate() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java index 78fd896c897..afcfafe976c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionFilesServiceBean.java @@ -42,6 +42,16 @@ public enum FileDownloadSizeMode { All, Original, Archival } + /** + * Given a DatasetVersion, returns its total file metadata count + * + * @param datasetVersion the DatasetVersion to access + * @return long value of total file metadata count + */ + public long getFileMetadataCount(DatasetVersion datasetVersion) { + return getFileMetadataCount(datasetVersion, new FileSearchCriteria(null, null, null, null, null)); + } + /** * Given a DatasetVersion, returns its total file metadata count * @@ -189,6 +199,32 @@ public long getFilesDownloadSize(DatasetVersion datasetVersion, FileSearchCriter }; } + /** + * Determines whether or not a DataFile is present in a DatasetVersion + * + * @param datasetVersion the DatasetVersion to check + * @param dataFile the DataFile to check + * @return boolean value + */ + public boolean isDataFilePresentInDatasetVersion(DatasetVersion datasetVersion, DataFile dataFile) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root dataFileRoot = criteriaQuery.from(DataFile.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class); + criteriaQuery + .select(criteriaBuilder.count(dataFileRoot)) + .where(criteriaBuilder.and( + criteriaBuilder.equal(dataFileRoot.get("id"), dataFile.getId()), + criteriaBuilder.equal(datasetVersionRoot.get("id"), datasetVersion.getId()), + fileMetadataRoot.in(dataFileRoot.get("fileMetadatas")), + fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas")) + ) + ); + Long count = em.createQuery(criteriaQuery).getSingleResult(); + return count != null && count > 0; + } + private void addAccessStatusCountToTotal(DatasetVersion datasetVersion, Map totalCounts, FileAccessStatus dataFileAccessStatus, FileSearchCriteria searchCriteria) { long fileMetadataCount = getFileMetadataCountByAccessStatus(datasetVersion, dataFileAccessStatus, searchCriteria); if (fileMetadataCount > 0) { @@ -210,6 +246,8 @@ private long getFileMetadataCountByAccessStatus(DatasetVersion datasetVersion, F private Predicate createSearchCriteriaAccessStatusPredicate(FileAccessStatus accessStatus, CriteriaBuilder criteriaBuilder, Root fileMetadataRoot) { Path dataFile = fileMetadataRoot.get("dataFile"); + Path retention = dataFile.get("retention"); + Predicate retentionExpiredPredicate = criteriaBuilder.lessThan(retention.get("dateUnavailable"), criteriaBuilder.currentDate()); Path embargo = dataFile.get("embargo"); Predicate activelyEmbargoedPredicate = criteriaBuilder.greaterThanOrEqualTo(embargo.get("dateAvailable"), criteriaBuilder.currentDate()); Predicate inactivelyEmbargoedPredicate = criteriaBuilder.isNull(embargo); @@ -217,6 +255,7 @@ private Predicate createSearchCriteriaAccessStatusPredicate(FileAccessStatus acc Predicate isRestrictedPredicate = criteriaBuilder.isTrue(isRestricted); Predicate isUnrestrictedPredicate = criteriaBuilder.isFalse(isRestricted); return switch (accessStatus) { + case RetentionPeriodExpired -> criteriaBuilder.and(retentionExpiredPredicate); case EmbargoedThenRestricted -> criteriaBuilder.and(activelyEmbargoedPredicate, isRestrictedPredicate); case EmbargoedThenPublic -> criteriaBuilder.and(activelyEmbargoedPredicate, isUnrestrictedPredicate); case Restricted -> criteriaBuilder.and(inactivelyEmbargoedPredicate, isRestrictedPredicate); @@ -260,22 +299,27 @@ private Predicate createSearchCriteriaPredicate(DatasetVersion datasetVersion, return criteriaBuilder.and(predicates.toArray(new Predicate[]{})); } - private Order createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, - FileOrderCriteria orderCriteria, - Root fileMetadataRoot) { + private List createGetFileMetadatasOrder(CriteriaBuilder criteriaBuilder, + FileOrderCriteria orderCriteria, + Root fileMetadataRoot) { Path label = fileMetadataRoot.get("label"); Path dataFile = fileMetadataRoot.get("dataFile"); Path publicationDate = dataFile.get("publicationDate"); Path createDate = dataFile.get("createDate"); Expression orderByLifetimeExpression = criteriaBuilder.selectCase().when(publicationDate.isNotNull(), publicationDate).otherwise(createDate); - return switch (orderCriteria) { - case NameZA -> criteriaBuilder.desc(label); - case Newest -> criteriaBuilder.desc(orderByLifetimeExpression); - case Oldest -> criteriaBuilder.asc(orderByLifetimeExpression); - case Size -> criteriaBuilder.asc(dataFile.get("filesize")); - case Type -> criteriaBuilder.asc(dataFile.get("contentType")); - default -> criteriaBuilder.asc(label); - }; + List orderList = new ArrayList<>(); + switch (orderCriteria) { + case NameZA -> orderList.add(criteriaBuilder.desc(label)); + case Newest -> orderList.add(criteriaBuilder.desc(orderByLifetimeExpression)); + case Oldest -> orderList.add(criteriaBuilder.asc(orderByLifetimeExpression)); + case Size -> orderList.add(criteriaBuilder.asc(dataFile.get("filesize"))); + case Type -> { + orderList.add(criteriaBuilder.asc(dataFile.get("contentType"))); + orderList.add(criteriaBuilder.asc(label)); + } + default -> orderList.add(criteriaBuilder.asc(label)); + } + return orderList; } private long getOriginalTabularFilesSize(DatasetVersion datasetVersion, FileSearchCriteria searchCriteria) { diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java index cd3291e6222..ab23fa779d5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionServiceBean.java @@ -163,6 +163,7 @@ public DatasetVersion findDeep(Object pk) { .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.dataTables") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.fileCategories") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.embargo") + .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.retention") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.datasetVersion") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.releaseUser") .setHint("eclipselink.left-join-fetch", "o.fileMetadatas.dataFile.creator") @@ -802,6 +803,7 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND fm.datafile_id = df.id " + "AND df.restricted = false " + "AND df.embargo_id is null " + + "AND df.retention_id is null " + "AND o.previewImageAvailable = true " + "ORDER BY df.id LIMIT 1;").getSingleResult(); } catch (Exception ex) { @@ -825,9 +827,10 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + + "AND df.retention_id is null " + "AND df.contenttype LIKE 'image/%' " + "AND NOT df.contenttype = 'image/fits' " + "AND df.filesize < " + imageThumbnailSizeLimit + " " @@ -859,9 +862,10 @@ public Long getThumbnailByVersionId(Long versionId) { + "AND df.id = o.id " + "AND fm.datasetversion_id = dv.id " + "AND fm.datafile_id = df.id " - // + "AND o.previewImageAvailable = false " + + "AND o.previewimagefail = false " + "AND df.restricted = false " + "AND df.embargo_id is null " + + "AND df.retention_id is null " + "AND df.contenttype = 'application/pdf' " + "AND df.filesize < " + imageThumbnailSizeLimit + " " + "ORDER BY df.filesize ASC LIMIT 1;").getSingleResult(); diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index 682c1dc6744..978c716e058 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -103,7 +104,11 @@ public enum DataverseType { * dataverses. */ protected boolean permissionRoot; - + + public Dataverse() { + StorageUse storageUse = new StorageUse(this); + this.setStorageUse(storageUse); + } public DataverseType getDataverseType() { return dataverseType; @@ -406,6 +411,20 @@ public List getDataverseFieldTypeInputLevels() { return dataverseFieldTypeInputLevels; } + public boolean isDatasetFieldTypeRequiredAsInputLevel(Long datasetFieldTypeId) { + return dataverseFieldTypeInputLevels.stream() + .anyMatch(inputLevel -> inputLevel.getDatasetFieldType().getId().equals(datasetFieldTypeId) && inputLevel.isRequired()); + } + + public boolean isDatasetFieldTypeIncludedAsInputLevel(Long datasetFieldTypeId) { + return dataverseFieldTypeInputLevels.stream() + .anyMatch(inputLevel -> inputLevel.getDatasetFieldType().getId().equals(datasetFieldTypeId) && inputLevel.isInclude()); + } + + public boolean isDatasetFieldTypeInInputLevels(Long datasetFieldTypeId) { + return dataverseFieldTypeInputLevels.stream() + .anyMatch(inputLevel -> inputLevel.getDatasetFieldType().getId().equals(datasetFieldTypeId)); + } public Template getDefaultTemplate() { return defaultTemplate; @@ -461,9 +480,6 @@ public void setTemplateRoot(boolean templateRoot) { this.templateRoot = templateRoot; } - - - public List getMetadataBlocks() { return getMetadataBlocks(false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java index c4749be0cb3..a3425987bf8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java @@ -30,8 +30,9 @@ @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId", query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"), @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList", - query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList") - + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"), + @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId", + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ") }) @Table(name="DataverseFieldTypeInputLevel" , uniqueConstraints={ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java index 66c700f59ce..1bd290ecc4d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java @@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat return null; } } + + public List findRequiredByDataverseId(Long dataverseId) { + Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class); + query.setParameter("dataverseId", dataverseId); + try{ + return query.getResultList(); + } catch ( NoResultException nre ) { + return null; + } + } public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) { em.remove(em.merge(dataverseFieldTypeInputLevel)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 943a74327d5..351d304bad3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -9,12 +9,16 @@ import edu.harvard.iq.dataverse.dataverse.DataverseUtil; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.CheckRateLimitForCollectionPageCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateSavedSearchCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.FacetCategory; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SearchFields; @@ -28,15 +32,20 @@ import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.List; + +import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; import jakarta.ejb.EJB; import jakarta.faces.application.FacesMessage; import jakarta.faces.context.FacesContext; import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; + +import java.util.AbstractMap; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; @@ -109,7 +118,13 @@ public enum LinkMode { @EJB DataverseLinkingServiceBean linkingService; @Inject PermissionsWrapper permissionsWrapper; - @Inject DataverseHeaderFragment dataverseHeaderFragment; + @Inject + NavigationWrapper navigationWrapper; + @Inject DataverseHeaderFragment dataverseHeaderFragment; + @EJB + PidProviderFactoryBean pidProviderFactoryBean; + @EJB + CacheFactoryBean cacheFactory; private Dataverse dataverse = new Dataverse(); @@ -310,7 +325,10 @@ public void updateOwnerDataverse() { public String init() { //System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes - + // Check for rate limit exceeded. Must be done before anything else to prevent unnecessary processing. + if (!cacheFactory.checkRate(session.getUser(), new CheckRateLimitForCollectionPageCommand(null,null))) { + return navigationWrapper.tooManyRequests(); + } if (this.getAlias() != null || this.getId() != null || this.getOwnerId() == null) {// view mode for a dataverse if (this.getAlias() != null) { dataverse = dataverseService.findByAlias(this.getAlias()); @@ -362,7 +380,7 @@ public void initFeaturedDataverses() { List featuredSource = new ArrayList<>(); List featuredTarget = new ArrayList<>(); featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId())); - featuredSource.addAll(linkingService.findLinkingDataverses(dataverse.getId())); + featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId())); List featuredList = featuredDataverseService.findByDataverseId(dataverse.getId()); for (DataverseFeaturedDataverse dfd : featuredList) { Dataverse fd = dfd.getFeaturedDataverse(); @@ -1289,4 +1307,34 @@ public String getCurationLabelSetNameLabel() { public Set> getGuestbookEntryOptions() { return settingsWrapper.getGuestbookEntryOptions(this.dataverse).entrySet(); } + + public Set> getPidProviderOptions() { + PidProvider defaultPidProvider = pidProviderFactoryBean.getDefaultPidGenerator(); + Set providerIds = PidUtil.getManagedProviderIds(); + Set> options = new HashSet>(); + if (providerIds.size() > 1) { + + String label = null; + if (this.dataverse.getOwner() != null && this.dataverse.getOwner().getEffectivePidGenerator()!= null) { + PidProvider inheritedPidProvider = this.dataverse.getOwner().getEffectivePidGenerator(); + label = inheritedPidProvider.getLabel() + " " + BundleUtil.getStringFromBundle("dataverse.inherited") + ": " + + inheritedPidProvider.getProtocol() + ":" + inheritedPidProvider.getAuthority() + + inheritedPidProvider.getSeparator() + inheritedPidProvider.getShoulder(); + } else { + label = defaultPidProvider.getLabel() + " " + BundleUtil.getStringFromBundle("dataverse.default") + ": " + + defaultPidProvider.getProtocol() + ":" + defaultPidProvider.getAuthority() + + defaultPidProvider.getSeparator() + defaultPidProvider.getShoulder(); + } + Entry option = new AbstractMap.SimpleEntry("default", label); + options.add(option); + } + for (String providerId : providerIds) { + PidProvider pidProvider = PidUtil.getPidProvider(providerId); + String label = pidProvider.getLabel() + ": " + pidProvider.getProtocol() + ":" + pidProvider.getAuthority() + + pidProvider.getSeparator() + pidProvider.getShoulder(); + Entry option = new AbstractMap.SimpleEntry(providerId, label); + options.add(option); + } + return options; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 549b8310122..10b5d800c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,8 +18,11 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -42,7 +45,15 @@ import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.TypedQuery; +import java.nio.file.Files; +import java.nio.file.Paths; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.json.JSONTokener; /** * @@ -80,6 +91,9 @@ public class DataverseServiceBean implements java.io.Serializable { @EJB PermissionServiceBean permissionService; + @EJB + DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService; + @EJB SystemConfig systemConfig; @@ -346,51 +360,6 @@ public String getDataverseLogoThumbnailAsBase64ById(Long dvId) { } return null; } - - /* - public boolean isDataverseLogoThumbnailAvailable(Dataverse dataverse, User user) { - if (dataverse == null) { - return false; - } - - // First, check if the dataverse has a defined logo: - - //if (dataverse.getDataverseTheme() != null && dataverse.getDataverseTheme().getLogo() != null && !dataverse.getDataverseTheme().getLogo().equals("")) { - File dataverseLogoFile = getLogo(dataverse); - if (dataverseLogoFile != null) { - String logoThumbNailPath = null; - - if (dataverseLogoFile.exists()) { - logoThumbNailPath = ImageThumbConverter.generateImageThumbnailFromFile(dataverseLogoFile.getAbsolutePath(), 48); - if (logoThumbNailPath != null) { - return true; - } - } - } - //} - */ - // If there's no uploaded logo for this dataverse, go through its - // [released] datasets and see if any of them have card images: - // - // TODO: - // Discuss/Decide if we really want to do this - i.e., go through every - // file in every dataset below... - // -- L.A. 4.0 beta14 - /* - for (Dataset dataset : datasetService.findPublishedByOwnerId(dataverse.getId())) { - if (dataset != null) { - DatasetVersion releasedVersion = dataset.getReleasedVersion(); - - if (releasedVersion != null) { - if (datasetService.isDatasetCardImageAvailable(releasedVersion, user)) { - return true; - } - } - } - } */ - /* - return false; - } */ private File getLogo(Dataverse dataverse) { if (dataverse.getId() == null) { @@ -919,5 +888,294 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } + + public String getCollectionDatasetSchema(String dataverseAlias) { + + Dataverse testDV = this.findByAlias(dataverseAlias); + + while (!testDV.isMetadataBlockRoot()) { + if (testDV.getOwner() == null) { + break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + } + testDV = testDV.getOwner(); + } + + /* Couldn't get the 'return base if no extra required fields to work with the path provided + leaving it as 'out of scope' for now SEK 11/27/2023 + + List required = new ArrayList<>(); + + required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId()); + + if (required == null || required.isEmpty()){ + String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json"; + String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile); + if (baseSchema != null && !baseSchema.isEmpty()){ + return baseSchema; + } + } + + */ + List selectedBlocks = new ArrayList<>(); + List requiredDSFT = new ArrayList<>(); + + selectedBlocks.addAll(testDV.getMetadataBlocks()); + + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if (!dsft.isChild()) { + DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId()); + if (dsfIl != null) { + dsft.setRequiredDV(dsfIl.isRequired()); + dsft.setInclude(dsfIl.isInclude()); + } else { + dsft.setRequiredDV(dsft.isRequired()); + dsft.setInclude(true); + } + if (dsft.isHasChildren()) { + for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { + DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); + if (dsfIlChild != null) { + child.setRequiredDV(dsfIlChild.isRequired()); + child.setInclude(dsfIlChild.isInclude()); + } else { + // in the case of conditionally required (child = true, parent = false) + // we set this to false; i.e this is the default "don't override" value + child.setRequiredDV(child.isRequired() && dsft.isRequired()); + child.setInclude(true); + } + } + } + if(dsft.isRequiredDV()){ + requiredDSFT.add(dsft); + } + } + } + + } + + String reqMDBNames = ""; + List hasReqFields = new ArrayList<>(); + String retval = datasetSchemaPreface; + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : requiredDSFT) { + if (dsft.getMetadataBlock().equals(mdb)) { + hasReqFields.add(mdb); + if (!reqMDBNames.isEmpty()) reqMDBNames += ","; + reqMDBNames += "\"" + mdb.getName() + "\""; + break; + } + } + } + int countMDB = 0; + for (MetadataBlock mdb : hasReqFields) { + if (countMDB>0){ + retval += ","; + } + retval += getCustomMDBSchema(mdb, requiredDSFT); + countMDB++; + } + + retval += "\n }"; + + retval += endOfjson.replace("blockNames", reqMDBNames); + + return retval; + + } + + private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ + String retval = ""; + boolean mdbHasReqField = false; + int numReq = 0; + List requiredThisMDB = new ArrayList<>(); + + for (DatasetFieldType dsft : requiredDSFT ){ + + if(dsft.getMetadataBlock().equals(mdb)){ + numReq++; + mdbHasReqField = true; + requiredThisMDB.add(dsft); + } + } + if (mdbHasReqField){ + retval += startOfMDB.replace("blockName", mdb.getName()); + + retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); + int count = 0; + for (DatasetFieldType dsft:requiredThisMDB ){ + count++; + String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); + if (count < requiredThisMDB.size()){ + retval += reqValImp + "\n"; + } else { + reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); + retval += reqValImp+ "\n"; + retval += endOfReqVal; + } + } + + } + + return retval; + } + + public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + + try { + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + } catch (ValidationException vx) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + + } catch (Exception ex) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); + } + + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); + } + + static String getBaseSchemaStringFromFile(String pathToJsonFile) { + File datasetSchemaJson = new File(pathToJsonFile); + try { + String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath()))); + return datasetSchemaAsJson; + } catch (IOException ex) { + logger.info("IO - failed to get schema file - will build on fly " +ex.getMessage()); + return null; + } catch (Exception e){ + logger.info("Other exception - failed to get schema file - will build on fly. " + e.getMessage()); + return null; + } + } + + private String datasetSchemaPreface = + "{\n" + + " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + + " \"$defs\": {\n" + + " \"field\": {\n" + + " \"type\": \"object\",\n" + + " \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" + + " \"properties\": {\n" + + " \"value\": {\n" + + " \"anyOf\": [\n" + + " {\n" + + " \"type\": \"array\"\n" + + " },\n" + + " {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"$ref\": \"#/$defs/field\"\n" + + " }\n" + + " ]\n" + + " },\n" + + " \"typeClass\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"multiple\": {\n" + + " \"type\": \"boolean\"\n" + + " },\n" + + " \"typeName\": {\n" + + " \"type\": \"string\"\n" + + " }\n" + + " }\n" + + " }\n" + + "},\n" + + "\"type\": \"object\",\n" + + "\"properties\": {\n" + + " \"datasetVersion\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"license\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"uri\": {\n" + + " \"type\": \"string\",\n" + + " \"format\": \"uri\"\n" + + " }\n" + + " },\n" + + " \"required\": [\"name\", \"uri\"]\n" + + " },\n" + + " \"metadataBlocks\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + "" ; + private String startOfMDB = "" + +" \"blockName\": {\n" + +" \"type\": \"object\",\n" + +" \"properties\": {\n" + +" \"fields\": {\n" + +" \"type\": \"array\",\n" + +" \"items\": {\n" + +" \"$ref\": \"#/$defs/field\"\n" + +" },"; + + private String reqValTemplate = " {\n" + +" \"contains\": {\n" + +" \"properties\": {\n" + +" \"typeName\": {\n" + +" \"const\": \"reqFieldTypeName\"\n" + +" }\n" + +" }\n" + +" }\n" + +" },"; + + private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" + +" \"allOf\": [\n"; + private String endOfReqVal = " ]\n" + +" }\n" + +" },\n" + +" \"required\": [\"fields\"]\n" + +" }"; + + private String endOfjson = ",\n" + +" \"required\": [blockNames]\n" + +" }\n" + +" },\n" + +" \"required\": [\"metadataBlocks\"]\n" + +" }\n" + +" },\n" + +" \"required\": [\"datasetVersion\"]\n" + +"}\n"; + + public void saveStorageQuota(Dataverse target, Long allocation) { + StorageQuota storageQuota = target.getStorageQuota(); + + if (storageQuota != null) { + storageQuota.setAllocation(allocation); + em.merge(storageQuota); + } else { + storageQuota = new StorageQuota(); + storageQuota.setDefinitionPoint(target); + storageQuota.setAllocation(allocation); + target.setStorageQuota(storageQuota); + em.persist(storageQuota); + } + em.flush(); + } + + public void disableStorageQuota(StorageQuota storageQuota) { + if (storageQuota != null && storageQuota.getAllocation() != null) { + storageQuota.setAllocation(null); + em.merge(storageQuota); + em.flush(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index 9e7f3f3fe96..cc5d7620969 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; import java.sql.Timestamp; import java.text.SimpleDateFormat; @@ -155,7 +156,7 @@ public String visit(DataFile df) { private String identifier; private boolean identifierRegistered; - + private transient GlobalId globalId = null; @OneToMany(mappedBy = "dvObject", cascade = CascadeType.ALL, orphanRemoval = true) @@ -177,6 +178,9 @@ public void setAlternativePersistentIndentifiers(Set roleAssignments; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java index f7d361d76f5..56d26a7260d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectContainer.java @@ -1,12 +1,20 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUse; import edu.harvard.iq.dataverse.util.SystemConfig; -import java.util.Locale; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; +import jakarta.persistence.CascadeType; import java.util.Optional; - import jakarta.persistence.MappedSuperclass; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Transient; + import org.apache.commons.lang3.StringUtils; /** @@ -41,6 +49,14 @@ public boolean isEffectivelyPermissionRoot() { private Boolean guestbookAtRequest = null; + private String pidGeneratorSpecs = null; + + @Transient + private PidProvider pidGenerator = null; + + @OneToOne(mappedBy = "dvObjectContainer",cascade={ CascadeType.REMOVE, CascadeType.PERSIST}, orphanRemoval=true) + private StorageUse storageUse; + public String getEffectiveStorageDriverId() { String id = storageDriver; if (StringUtils.isBlank(id)) { @@ -160,5 +176,89 @@ public String getCurationLabelSetName() { public void setCurationLabelSetName(String setName) { this.externalLabelSetName = setName; } + + /** + * Should only be used in constructors for DvObjectContainers (Datasets and + * Collections), to make sure new entries are created and persisted in the + * database StorageUse table for every DvObject container we create. + * @param storageUse + */ + public void setStorageUse(StorageUse storageUse) { + this.storageUse = storageUse; + } + + + /* Dataverse collections and dataset can be configured to use different PidProviders as PID generators for contained objects (datasets or data files). + * This mechanism is similar to others except that the stored value is a JSON object defining the protocol, authority, shoulder, and, optionally, the separator for the PidProvider. + */ + + public String getPidGeneratorSpecs() { + return pidGeneratorSpecs; + } + + public void setPidGeneratorSpecs(String pidGeneratorSpecs) { + this.pidGeneratorSpecs = pidGeneratorSpecs; + } + + // Used in JSF when selecting the PidGenerator + // It only returns an id if this dvObjectContainer has PidGenerator specs set on it, otherwise it returns "default" + public String getPidGeneratorId() { + if (StringUtils.isBlank(getPidGeneratorSpecs())) { + return "default"; + } else { + return getEffectivePidGenerator().getId(); + } + } + + //Used in JSF when setting the PidGenerator + public void setPidGeneratorId(String pidGeneratorId) { + // Note that the "default" provider will not be found so will result in + // setPidGenerator(null), which unsets the pidGenerator/Specs as desired + setPidGenerator(PidUtil.getPidProvider(pidGeneratorId)); + } + + public void setPidGenerator(PidProvider pidGenerator) { + this.pidGenerator = pidGenerator; + if (pidGenerator != null) { + JsonObjectBuilder job = jakarta.json.Json.createObjectBuilder(); + this.pidGeneratorSpecs = job.add("protocol", pidGenerator.getProtocol()) + .add("authority", pidGenerator.getAuthority()).add("shoulder", pidGenerator.getShoulder()) + .add("separator", pidGenerator.getSeparator()).build().toString(); + } else { + this.pidGeneratorSpecs = null; + } + } + + public PidProvider getEffectivePidGenerator() { + if (pidGenerator == null) { + String specs = getPidGeneratorSpecs(); + if (StringUtils.isBlank(specs)) { + GlobalId pid = getGlobalId(); + if ((pid != null) && PidUtil.getPidProvider(pid.getProviderId()).canCreatePidsLike(pid)) { + pidGenerator = PidUtil.getPidProvider(pid.getProviderId()); + } else { + if (getOwner() != null) { + pidGenerator = getOwner().getEffectivePidGenerator(); + } + } + } else { + JsonObject providerSpecs = JsonUtil.getJsonObject(specs); + if (providerSpecs.containsKey("separator")) { + pidGenerator = PidUtil.getPidProvider(providerSpecs.getString("protocol"), + providerSpecs.getString("authority"), providerSpecs.getString("shoulder"), + providerSpecs.getString("separator")); + } else { + pidGenerator = PidUtil.getPidProvider(providerSpecs.getString("protocol"), + providerSpecs.getString("authority"), providerSpecs.getString("shoulder")); + } + } + if(pidGenerator!=null && pidGenerator.canManagePID()) { + setPidGenerator(pidGenerator); + } else { + setPidGenerator(null); + } + } + return pidGenerator; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index d4219c36149..bd7fbeaff10 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -1,8 +1,9 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; - import java.sql.Timestamp; import java.util.ArrayList; import java.util.Date; @@ -12,6 +13,8 @@ import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + +import jakarta.ejb.EJB; import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import static jakarta.ejb.TransactionAttributeType.REQUIRES_NEW; @@ -38,6 +41,9 @@ public class DvObjectServiceBean implements java.io.Serializable { @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; + @EJB + PidProviderFactoryBean pidProviderFactoryBean; + private static final Logger logger = Logger.getLogger(DvObjectServiceBean.class.getCanonicalName()); /** * @param dvoc The object we check @@ -389,4 +395,19 @@ public String generateNewIdentifierByStoredProcedure() { return (String) query.getOutputParameterValue(1); } + /** @deprecated Backward-compatibility method to get the effective pid generator for a DvObjectContainer. + * If the dvObjectContainer method fails, this method will check for the old global default settings. + * If/when those are no longer supported, this method can be removed and replaced with calls directly + * to dvObjectContainer.getEffectivePidGenerator(); + * + */ + @Deprecated(forRemoval = true, since = "2024-02-09") + public PidProvider getEffectivePidGenerator(DvObjectContainer dvObjectContainer) { + PidProvider pidGenerator = dvObjectContainer.getEffectivePidGenerator(); + if (pidGenerator == null) { + pidGenerator = pidProviderFactoryBean.getDefaultPidGenerator(); + } + return pidGenerator; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index a942830b19e..993cb02b66b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.provenance.ProvPopupFragmentBean; import edu.harvard.iq.dataverse.DataFile.ChecksumType; -import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.api.AbstractApiBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; @@ -38,6 +37,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -206,7 +206,7 @@ public enum Referrer { private final int NUMBER_OF_SCROLL_ROWS = 25; private DataFile singleFile = null; - private UserStorageQuota userStorageQuota = null; + private UploadSessionQuotaLimit uploadSessionQuota = null; public DataFile getSingleFile() { return singleFile; @@ -359,7 +359,7 @@ public String getHumanMaxTotalUploadSizeInBytes() { } public boolean isStorageQuotaEnforced() { - return userStorageQuota != null; + return uploadSessionQuota != null; } public Long getMaxIngestSizeInBytes() { @@ -530,8 +530,10 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); if (systemConfig.isStorageQuotasEnforced()) { - this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset); - this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes(); + this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset); + if (this.uploadSessionQuota != null) { + this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes(); + } } else { this.maxTotalUploadSizeInBytes = null; } @@ -547,7 +549,7 @@ public String initCreateMode(String modeToken, DatasetVersion version, MutableBo } public boolean isQuotaExceeded() { - return systemConfig.isStorageQuotasEnforced() && userStorageQuota != null && userStorageQuota.getRemainingQuotaInBytes() == 0; + return systemConfig.isStorageQuotasEnforced() && uploadSessionQuota != null && uploadSessionQuota.getRemainingQuotaInBytes() == 0; } public String init() { @@ -592,8 +594,10 @@ public String init() { clone = workingVersion.cloneDatasetVersion(); this.maxFileUploadSizeInBytes = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); if (systemConfig.isStorageQuotasEnforced()) { - this.userStorageQuota = datafileService.getUserStorageQuota((AuthenticatedUser) session.getUser(), dataset); - this.maxTotalUploadSizeInBytes = userStorageQuota.getRemainingQuotaInBytes(); + this.uploadSessionQuota = datafileService.getUploadSessionQuotaLimit(dataset); + if (this.uploadSessionQuota != null) { + this.maxTotalUploadSizeInBytes = uploadSessionQuota.getRemainingQuotaInBytes(); + } } this.maxIngestSizeInBytes = systemConfig.getTabularIngestSizeLimit(); this.humanPerFormatTabularLimits = populateHumanPerFormatTabularLimits(); @@ -1098,7 +1102,7 @@ public String save() { } // Try to save the NEW files permanently: - List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true); + List filesAdded = ingestService.saveAndAddFilesToDataset(workingVersion, newFiles, null, true); // reset the working list of fileMetadatas, as to only include the ones // that have been added to the version successfully: @@ -1233,9 +1237,6 @@ public String save() { - We decided not to bother obtaining persistent ids for new files as they are uploaded and created. The identifiers will be assigned later, when the version is published. - - logger.info("starting async job for obtaining persistent ids for files."); - datasetService.obtainPersistentIdentifiersForDatafiles(dataset); */ } @@ -1529,7 +1530,7 @@ public void handleDropBoxUpload(ActionEvent event) { // zip file. //datafiles = ingestService.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream"); //CreateDataFileResult createDataFilesResult = FileUtil.createDataFiles(workingVersion, dropBoxStream, fileName, "application/octet-stream", null, null, systemConfig); - Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, userStorageQuota, null); + Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, dropBoxStream, fileName, "application/octet-stream", null, uploadSessionQuota, null); CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); datafiles = createDataFilesResult.getDataFiles(); Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); @@ -2068,9 +2069,9 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { // dataset that does not yet exist in the database. We must // use the version of the Create New Files constructor that takes // the parent Dataverse as the extra argument: - cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null, null, null, workingVersion.getDataset().getOwner()); + cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null, null, null, workingVersion.getDataset().getOwner()); } else { - cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, userStorageQuota, null); + cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, uFile.getInputStream(), uFile.getFileName(), uFile.getContentType(), null, uploadSessionQuota, null); } CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); @@ -2169,7 +2170,7 @@ public void handleExternalUpload() { - Max size specified in db: check too make sure file is within limits // ---------------------------- */ /** - * @todo: this size check is probably redundant here, since the new + * @todo: this file size limit check is now redundant here, since the new * CreateNewFilesCommand is going to perform it (and the quota * checks too, if enabled */ @@ -2208,7 +2209,7 @@ public void handleExternalUpload() { try { - Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, userStorageQuota, checksumValue, checksumType, fileSize, parent); + Command cmd = new CreateNewDataFilesCommand(dvRequestService.getDataverseRequest(), workingVersion, null, fileName, contentType, fullStorageIdentifier, uploadSessionQuota, checksumValue, checksumType, fileSize, parent); CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); datafiles = createDataFilesResult.getDataFiles(); Optional.ofNullable(editDataFilesPageHelper.getHtmlErrorMessage(createDataFilesResult)).ifPresent(errorMessage -> errorMessages.add(errorMessage)); diff --git a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java index bad8903c091..c8537f2a424 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java +++ b/src/main/java/edu/harvard/iq/dataverse/EjbDataverseEngine.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; +import edu.harvard.iq.dataverse.util.cache.CacheFactoryBean; import edu.harvard.iq.dataverse.engine.DataverseEngine; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; @@ -16,9 +17,9 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.engine.command.exception.RateLimitCommandException; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexBatchServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -31,6 +32,7 @@ import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -48,7 +50,6 @@ import static jakarta.ejb.TransactionAttributeType.SUPPORTS; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; -import jakarta.validation.ConstraintViolation; import jakarta.validation.ConstraintViolationException; /** @@ -113,20 +114,8 @@ public class EjbDataverseEngine { DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels; @EJB - DOIEZIdServiceBean doiEZId; - - @EJB - DOIDataCiteServiceBean doiDataCite; - - @EJB - FakePidProviderServiceBean fakePidProvider; + PidProviderFactoryBean pidProviderFactory; - @EJB - HandlenetServiceBean handleNet; - - @EJB - PermaLinkPidProviderServiceBean permaLinkProvider; - @EJB SettingsServiceBean settings; @@ -135,6 +124,9 @@ public class EjbDataverseEngine { @EJB GuestbookResponseServiceBean responses; + + @EJB + MetadataBlockServiceBean metadataBlockService; @EJB DataverseLinkingServiceBean dvLinking; @@ -142,6 +134,9 @@ public class EjbDataverseEngine { @EJB DatasetLinkingServiceBean dsLinking; + @EJB + DatasetFieldServiceBean dsField; + @EJB ExplicitGroupServiceBean explicitGroups; @@ -185,8 +180,13 @@ public class EjbDataverseEngine { ConfirmEmailServiceBean confirmEmailService; @EJB - EjbDataverseEngineInner innerEngine; + StorageUseServiceBean storageUseService; + @EJB + EjbDataverseEngineInner innerEngine; + + @EJB + CacheFactoryBean cacheFactory; @Resource EJBContext ejbCtxt; @@ -212,7 +212,11 @@ public R submit(Command aCommand) throws CommandException { try { logRec.setUserIdentifier( aCommand.getRequest().getUser().getIdentifier() ); - + // Check for rate limit exceeded. Must be done before anything else to prevent unnecessary processing. + if (!cacheFactory.checkRate(aCommand.getRequest().getUser(), aCommand)) { + throw new RateLimitCommandException(BundleUtil.getStringFromBundle("command.exception.user.ratelimited", Arrays.asList(aCommand.getClass().getSimpleName())), aCommand); + } + // Check permissions - or throw an exception Map> requiredMap = aCommand.getRequiredPermissions(); if (requiredMap == null) { @@ -480,28 +484,8 @@ public DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels() { } @Override - public DOIEZIdServiceBean doiEZId() { - return doiEZId; - } - - @Override - public DOIDataCiteServiceBean doiDataCite() { - return doiDataCite; - } - - @Override - public FakePidProviderServiceBean fakePidProvider() { - return fakePidProvider; - } - - @Override - public HandlenetServiceBean handleNet() { - return handleNet; - } - - @Override - public PermaLinkPidProviderServiceBean permaLinkProvider() { - return permaLinkProvider; + public PidProviderFactoryBean pidProviderFactory() { + return pidProviderFactory; } @Override @@ -528,6 +512,17 @@ public DataverseLinkingServiceBean dvLinking() { public DatasetLinkingServiceBean dsLinking() { return dsLinking; } + + @Override + public DatasetFieldServiceBean dsField() { + return dsField; + } + + @Override + public StorageUseServiceBean storageUse() { + return storageUseService; + } + @Override public DataverseEngine engine() { return new DataverseEngine() { @@ -603,6 +598,11 @@ public ActionLogServiceBean actionLog() { return logSvc; } + @Override + public MetadataBlockServiceBean metadataBlocks() { + return metadataBlockService; + } + @Override public void beginCommandSequence() { this.commandsCalled = new Stack(); diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java index a6ae7223d9d..80cf3db8d53 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadHelper.java @@ -8,7 +8,10 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.externaltools.ExternalTool; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; @@ -53,6 +56,9 @@ public class FileDownloadHelper implements java.io.Serializable { @EJB DataFileServiceBean datafileService; + + @EJB + GlobusServiceBean globusService; private final Map fileDownloadPermissionMap = new HashMap<>(); // { FileMetadata.id : Boolean } @@ -60,32 +66,32 @@ public FileDownloadHelper() { this.filesForRequestAccess = new ArrayList<>(); } - // See also @Size(max = 255) in GuestbookResponse - private boolean testResponseLength(String value) { - return !(value != null && value.length() > 255); - } - // This helper method is called from the Download terms/guestbook/etc. popup, // when the user clicks the "ok" button. We use it, instead of calling // downloadServiceBean directly, in order to differentiate between single // file downloads and multiple (batch) downloads - since both use the same // terms/etc. popup. - public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse) { + public void writeGuestbookAndStartDownload(GuestbookResponse guestbookResponse, boolean isGlobusTransfer) { PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); // Note that this method is only ever called from the file-download-popup - // meaning we know for the fact that we DO want to save this // guestbookResponse permanently in the database. - if (guestbookResponse.getSelectedFileIds() != null) { - // this is a batch (multiple file) download. - // Although here's a chance that this is not really a batch download - i.e., - // there may only be one file on the file list. But the fileDownloadService - // method below will check for that, and will redirect to the single download, if - // that's the case. -- L.A. - fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); - } else if (guestbookResponse.getDataFile() != null) { - // this a single file download: - fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + if(isGlobusTransfer) { + globusService.writeGuestbookAndStartTransfer(guestbookResponse, true); + } else { + if (guestbookResponse.getSelectedFileIds() != null) { + // this is a batch (multiple file) download. + // Although here's a chance that this is not really a batch download - i.e., + // there may only be one file on the file list. But the fileDownloadService + // method below will check for that, and will redirect to the single download, + // if + // that's the case. -- L.A. + fileDownloadService.writeGuestbookAndStartBatchDownload(guestbookResponse); + } else if (guestbookResponse.getDataFile() != null) { + // this a single file download: + fileDownloadService.writeGuestbookAndStartFileDownload(guestbookResponse); + } } } @@ -217,7 +223,10 @@ public boolean canDownloadFile(FileMetadata fileMetadata){ // Always allow download for PrivateUrlUser return true; } - + + // Retention expired files are always made unavailable, because they might be destroyed + if (FileUtil.isRetentionExpired(fileMetadata)) return false; + Long fid = fileMetadata.getId(); //logger.info("calling candownloadfile on filemetadata "+fid); // Note that `isRestricted` at the FileMetadata level is for expressing intent by version. Enforcement is done with `isRestricted` at the DataFile level. @@ -240,7 +249,9 @@ public boolean canDownloadFile(FileMetadata fileMetadata){ } } - if (!isRestrictedFile && !FileUtil.isActivelyEmbargoed(fileMetadata)){ + if (!isRestrictedFile + && !FileUtil.isActivelyEmbargoed(fileMetadata) + && !FileUtil.isRetentionExpired(fileMetadata)) { // Yes, save answer and return true this.fileDownloadPermissionMap.put(fid, true); return true; diff --git a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java index 55817d4a746..5370e9ac564 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileDownloadServiceBean.java @@ -4,7 +4,6 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -16,13 +15,13 @@ import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.io.IOException; import java.sql.Timestamp; import java.util.ArrayList; @@ -75,8 +74,6 @@ public class FileDownloadServiceBean implements java.io.Serializable { @EJB AuthenticationServiceBean authService; @EJB - PrivateUrlServiceBean privateUrlService; - @EJB SettingsServiceBean settingsService; @EJB MailServiceBean mailService; @@ -315,13 +312,19 @@ private void redirectToCustomZipDownloadService(String customZipServiceUrl, Stri } } - private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, Long fileMetadataId) { - String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, fileMetadataId); - logger.fine("Redirecting to file download url: " + fileDownloadUrl); - try { - FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); - } catch (IOException ex) { - logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + private void redirectToDownloadAPI(String downloadType, Long fileId, boolean guestBookRecordAlreadyWritten, + Long fileMetadataId) { + String fileDownloadUrl = FileUtil.getFileDownloadUrlPath(downloadType, fileId, guestBookRecordAlreadyWritten, + fileMetadataId); + if ("GlobusTransfer".equals(downloadType)) { + PrimeFaces.current().executeScript(URLTokenUtil.getScriptForUrl(fileDownloadUrl)); + } else { + logger.fine("Redirecting to file download url: " + fileDownloadUrl); + try { + FacesContext.getCurrentInstance().getExternalContext().redirect(fileDownloadUrl); + } catch (IOException ex) { + logger.info("Failed to issue a redirect to file download url (" + fileDownloadUrl + "): " + ex); + } } } @@ -351,8 +354,9 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter ApiToken apiToken = null; User user = session.getUser(); DatasetVersion version = fmd.getDatasetVersion(); - if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fmd))) { - apiToken = getApiToken(user); + if (version.isDraft() || fmd.getDatasetVersion().isDeaccessioned() || (fmd.getDataFile().isRestricted()) + || (FileUtil.isActivelyEmbargoed(fmd)) || (FileUtil.isRetentionExpired(fmd))) { + apiToken = authService.getValidApiTokenForUser(user); } DataFile dataFile = null; if (fmd != null) { @@ -379,46 +383,26 @@ public void explore(GuestbookResponse guestbookResponse, FileMetadata fmd, Exter } } - public ApiToken getApiToken(User user) { - ApiToken apiToken = null; - if (user instanceof AuthenticatedUser) { - AuthenticatedUser authenticatedUser = (AuthenticatedUser) user; - apiToken = authService.findApiTokenByUser(authenticatedUser); - if (apiToken == null || apiToken.isExpired()) { - //No un-expired token - apiToken = authService.generateApiTokenForUser(authenticatedUser); - } - } else if (user instanceof PrivateUrlUser) { - PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; - PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); - apiToken = new ApiToken(); - apiToken.setTokenString(privateUrl.getToken()); - } - return apiToken; - } - - public void downloadDatasetCitationXML(Dataset dataset) { - downloadCitationXML(null, dataset, false); + public void downloadDatasetCitationXML(DatasetVersion version) { + // DatasetVersion-level citation: + DataCitation citation=null; + citation = new DataCitation(version); + String fileNameString; + fileNameString = "attachment;filename=" + getFileNameFromPid(citation.getPersistentId()) + ".xml"; + downloadXML(citation, fileNameString); } public void downloadDatafileCitationXML(FileMetadata fileMetadata) { - downloadCitationXML(fileMetadata, null, false); + downloadCitationXML(fileMetadata, false); } public void downloadDirectDatafileCitationXML(FileMetadata fileMetadata) { - downloadCitationXML(fileMetadata, null, true); + downloadCitationXML(fileMetadata, true); } - public void downloadCitationXML(FileMetadata fileMetadata, Dataset dataset, boolean direct) { - DataCitation citation=null; - if (dataset != null){ - citation = new DataCitation(dataset.getLatestVersion()); - } else { - citation= new DataCitation(fileMetadata, direct); - } - FacesContext ctx = FacesContext.getCurrentInstance(); - HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); - response.setContentType("text/xml"); + public void downloadCitationXML(FileMetadata fileMetadata, boolean direct) { + DataCitation citation=null; + citation= new DataCitation(fileMetadata, direct); String fileNameString; if (fileMetadata == null || fileMetadata.getLabel() == null) { // Dataset-level citation: @@ -427,43 +411,46 @@ public void downloadCitationXML(FileMetadata fileMetadata, Dataset dataset, bool // Datafile-level citation: fileNameString = "attachment;filename=" + getFileNameFromPid(citation.getPersistentId()) + "-" + FileUtil.getCiteDataFileFilename(citation.getFileTitle(), FileUtil.FileCitationExtension.ENDNOTE); } + downloadXML(citation, fileNameString); + } + + public void downloadXML(DataCitation citation, String fileNameString) { + FacesContext ctx = FacesContext.getCurrentInstance(); + HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); + response.setContentType("text/xml"); response.setHeader("Content-Disposition", fileNameString); + try { ServletOutputStream out = response.getOutputStream(); citation.writeAsEndNoteCitation(out); out.flush(); ctx.responseComplete(); } catch (IOException e) { - } } - - public void downloadDatasetCitationRIS(Dataset dataset) { - downloadCitationRIS(null, dataset, false); + public void downloadDatasetCitationRIS(DatasetVersion version) { + // DatasetVersion-level citation: + DataCitation citation=null; + citation = new DataCitation(version); + String fileNameString; + fileNameString = "attachment;filename=" + getFileNameFromPid(citation.getPersistentId()) + ".ris"; + downloadRIS(citation, fileNameString); } public void downloadDatafileCitationRIS(FileMetadata fileMetadata) { - downloadCitationRIS(fileMetadata, null, false); + downloadCitationRIS(fileMetadata, false); } public void downloadDirectDatafileCitationRIS(FileMetadata fileMetadata) { - downloadCitationRIS(fileMetadata, null, true); + downloadCitationRIS(fileMetadata, true); } - public void downloadCitationRIS(FileMetadata fileMetadata, Dataset dataset, boolean direct) { - DataCitation citation=null; - if (dataset != null){ - citation = new DataCitation(dataset.getLatestVersion()); - } else { - citation= new DataCitation(fileMetadata, direct); - } - - FacesContext ctx = FacesContext.getCurrentInstance(); - HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); - response.setContentType("application/download"); - + public void downloadCitationRIS(FileMetadata fileMetadata, boolean direct) { + DataCitation citation=null; + citation= new DataCitation(fileMetadata, direct); + String fileNameString; if (fileMetadata == null || fileMetadata.getLabel() == null) { // Dataset-level citation: @@ -472,6 +459,14 @@ public void downloadCitationRIS(FileMetadata fileMetadata, Dataset dataset, bool // Datafile-level citation: fileNameString = "attachment;filename=" + getFileNameFromPid(citation.getPersistentId()) + "-" + FileUtil.getCiteDataFileFilename(citation.getFileTitle(), FileUtil.FileCitationExtension.RIS); } + downloadRIS(citation, fileNameString); + } + + public void downloadRIS(DataCitation citation, String fileNameString) { + //SEK 12/3/2018 changing this to open the json in a new tab. + FacesContext ctx = FacesContext.getCurrentInstance(); + HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); + response.setContentType("application/download"); response.setHeader("Content-Disposition", fileNameString); try { @@ -483,38 +478,33 @@ public void downloadCitationRIS(FileMetadata fileMetadata, Dataset dataset, bool } } - + private String getFileNameFromPid(GlobalId id) { return id.asString(); } - public void downloadDatasetCitationBibtex(Dataset dataset) { - downloadCitationBibtex(null, dataset, false); + public void downloadDatasetCitationBibtex(DatasetVersion version) { + // DatasetVersion-level citation: + DataCitation citation=null; + citation = new DataCitation(version); + String fileNameString; + fileNameString = "inline;filename=" + getFileNameFromPid(citation.getPersistentId()) + ".bib"; + downloadBibtex(citation, fileNameString); } public void downloadDatafileCitationBibtex(FileMetadata fileMetadata) { - downloadCitationBibtex(fileMetadata, null, false); + downloadCitationBibtex(fileMetadata, false); } public void downloadDirectDatafileCitationBibtex(FileMetadata fileMetadata) { - downloadCitationBibtex(fileMetadata, null, true); + downloadCitationBibtex(fileMetadata, true); } - public void downloadCitationBibtex(FileMetadata fileMetadata, Dataset dataset, boolean direct) { - DataCitation citation=null; - if (dataset != null){ - citation = new DataCitation(dataset.getLatestVersion()); - } else { - citation= new DataCitation(fileMetadata, direct); - } - //SEK 12/3/2018 changing this to open the json in a new tab. - FacesContext ctx = FacesContext.getCurrentInstance(); - HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); - - //Fix for 6029 FireFox was failing to parse it when content type was set to json - response.setContentType("text/plain"); + public void downloadCitationBibtex(FileMetadata fileMetadata, boolean direct) { + DataCitation citation=null; + citation= new DataCitation(fileMetadata, direct); String fileNameString; if (fileMetadata == null || fileMetadata.getLabel() == null) { @@ -524,6 +514,16 @@ public void downloadCitationBibtex(FileMetadata fileMetadata, Dataset dataset, b // Datafile-level citation: fileNameString = "inline;filename=" + getFileNameFromPid(citation.getPersistentId()) + "-" + FileUtil.getCiteDataFileFilename(citation.getFileTitle(), FileUtil.FileCitationExtension.BIBTEX); } + downloadBibtex(citation, fileNameString); + } + + public void downloadBibtex(DataCitation citation, String fileNameString) { + //SEK 12/3/2018 changing this to open the json in a new tab. + FacesContext ctx = FacesContext.getCurrentInstance(); + HttpServletResponse response = (HttpServletResponse) ctx.getExternalContext().getResponse(); + + //Fix for 6029 FireFox was failing to parse it when content type was set to json + response.setContentType("text/plain"); response.setHeader("Content-Disposition", fileNameString); try { @@ -573,7 +573,7 @@ public boolean requestAccess(DataFile dataFile, GuestbookResponse gbr){ public void sendRequestFileAccessNotification(Dataset dataset, Long fileId, AuthenticatedUser requestor) { Timestamp ts = new Timestamp(new Date().getTime()); - permissionService.getUsersWithPermissionOn(Permission.ManageDatasetPermissions, dataset).stream().forEach((au) -> { + permissionService.getUsersWithPermissionOn(Permission.ManageFilePermissions, dataset).stream().forEach((au) -> { userNotificationService.sendNotification(au, ts, UserNotification.Type.REQUESTFILEACCESS, fileId, null, requestor, true); }); //send the user that requested access a notification that they requested the access diff --git a/src/main/java/edu/harvard/iq/dataverse/FilePage.java b/src/main/java/edu/harvard/iq/dataverse/FilePage.java index bfae80ade27..afede00f3eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FilePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/FilePage.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.PersistProvFreeFormCommand; import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UningestFileCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.export.ExportService; import io.gdcc.spi.export.ExportException; @@ -28,24 +29,33 @@ import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; +import edu.harvard.iq.dataverse.ingest.IngestRequest; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; +import edu.harvard.iq.dataverse.util.StringUtil; + import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; +import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Set; +import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; + import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.faces.application.FacesMessage; @@ -112,10 +122,10 @@ public class FilePage implements java.io.Serializable { GuestbookResponseServiceBean guestbookResponseService; @EJB AuthenticationServiceBean authService; - @EJB DatasetServiceBean datasetService; - + @EJB + IngestServiceBean ingestService; @EJB SystemConfig systemConfig; @@ -144,6 +154,9 @@ public class FilePage implements java.io.Serializable { @Inject EmbargoServiceBean embargoService; + @Inject + RetentionServiceBean retentionService; + private static final Logger logger = Logger.getLogger(FilePage.class.getCanonicalName()); private boolean fileDeleteInProgress = false; @@ -209,7 +222,7 @@ public String init() { // If this DatasetVersion is unpublished and permission is doesn't have permissions: // > Go to the Login page // - // Check permisisons + // Check permissions Boolean authorized = (fileMetadata.getDatasetVersion().isReleased()) || (!fileMetadata.getDatasetVersion().isReleased() && this.canViewUnpublishedDataset()); @@ -238,12 +251,10 @@ public String init() { if (file.isTabularData()) { contentType=DataFileServiceBean.MIME_TYPE_TSV_ALT; } - configureTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.CONFIGURE, contentType); - exploreTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.EXPLORE, contentType); - queryTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.QUERY, contentType); - Collections.sort(exploreTools, CompareExternalToolName); - toolsWithPreviews = sortExternalTools(); - + loadExternalTools(); + + + if (toolType != null) { if (toolType.equals("PREVIEW")) { if (!toolsWithPreviews.isEmpty()) { @@ -271,11 +282,32 @@ public String init() { if(!hasValidTermsOfAccess && canUpdateDataset() ){ JsfHelper.addWarningMessage(BundleUtil.getStringFromBundle("dataset.message.editMetadata.invalid.TOUA.message")); } - + + LocalDate minRetentiondate = settingsWrapper.getMinRetentionDate(); + if (minRetentiondate != null){ + selectionRetention.setDateUnavailable(minRetentiondate.plusDays(1L)); + } + displayPublishMessage(); return null; } + private void loadExternalTools() { + String contentType= file.getContentType(); + configureTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.CONFIGURE, contentType); + exploreTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.EXPLORE, contentType); + queryTools = externalToolService.findFileToolsByTypeAndContentType(ExternalTool.Type.QUERY, contentType); + Collections.sort(exploreTools, CompareExternalToolName); + toolsWithPreviews = sortExternalTools(); + //For inaccessible files, only show the tools that have access to aux files (which are currently always accessible) + if(!StorageIO.isDataverseAccessible(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier()))) { + configureTools = configureTools.stream().filter(tool ->tool.accessesAuxFiles()).collect(Collectors.toList()); + exploreTools = exploreTools.stream().filter(tool ->tool.accessesAuxFiles()).collect(Collectors.toList()); + queryTools = queryTools.stream().filter(tool ->tool.accessesAuxFiles()).collect(Collectors.toList()); + toolsWithPreviews = toolsWithPreviews.stream().filter(tool ->tool.accessesAuxFiles()).collect(Collectors.toList()); + } + } + private void displayPublishMessage(){ if (fileMetadata.getDatasetVersion().isDraft() && canUpdateDataset() && (canPublishDataset() || !fileMetadata.getDatasetVersion().getDataset().isLockedFor(DatasetLock.Reason.InReview))){ @@ -283,13 +315,18 @@ private void displayPublishMessage(){ } } + Boolean valid = null; + public boolean isValid() { - if (!fileMetadata.getDatasetVersion().isDraft()) { - return true; + if (valid == null) { + final DatasetVersion workingVersion = fileMetadata.getDatasetVersion(); + if (workingVersion.isDraft() || (canUpdateDataset() && JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true))) { + valid = workingVersion.isValid(); + } else { + valid = true; + } } - DatasetVersion newVersion = fileMetadata.getDatasetVersion().cloneDatasetVersion(); - newVersion.setDatasetFields(newVersion.initDatasetFields()); - return newVersion.isValid(); + return valid; } private boolean canViewUnpublishedDataset() { @@ -475,6 +512,119 @@ public String restrictFile(boolean restricted) throws CommandException{ return returnToDraftVersion(); } + public String ingestFile() throws CommandException{ + + User u = session.getUser(); + if(!u.isAuthenticated() || !u.isSuperuser()) { + //Shouldn't happen (choice not displayed for users who don't have the right permission), but check anyway + logger.warning("User: " + u.getIdentifier() + " tried to ingest a file"); + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.cantIngestFileWarning")); + return null; + } + + editDataset = file.getOwner(); + + if (file.isTabularData()) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.alreadyIngestedWarning")); + return null; + } + + boolean ingestLock = dataset.isLockedFor(DatasetLock.Reason.Ingest); + + if (ingestLock) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.ingestInProgressWarning")); + return null; + } + + if (!FileUtil.canIngestAsTabular(file)) { + JH.addMessage(FacesMessage.SEVERITY_WARN, BundleUtil.getStringFromBundle("file.ingest.cantIngestFileWarning")); + return null; + + } + + file.SetIngestScheduled(); + + if (file.getIngestRequest() == null) { + file.setIngestRequest(new IngestRequest(file)); + } + + file.getIngestRequest().setForceTypeCheck(true); + + // update the datafile, to save the newIngest request in the database: + datafileService.save(file); + + // queue the data ingest job for asynchronous execution: + String status = ingestService.startIngestJobs(editDataset.getId(), new ArrayList<>(Arrays.asList(file)), (AuthenticatedUser) session.getUser()); + + if (!StringUtil.isEmpty(status)) { + // This most likely indicates some sort of a problem (for example, + // the ingest job was not put on the JMS queue because of the size + // of the file). But we are still returning the OK status - because + // from the point of view of the API, it's a success - we have + // successfully gone through the process of trying to schedule the + // ingest job... + + logger.warning("Ingest Status for file: " + file.getId() + " : " + status); + } + logger.fine("File: " + file.getId() + " ingest queued"); + + init(); + JsfHelper.addInfoMessage(BundleUtil.getStringFromBundle("file.ingest.ingestQueued")); + return returnToDraftVersion(); + } + + public String uningestFile() throws CommandException { + + if (!file.isTabularData()) { + //Ingest never succeeded, either there was a failure or this is not a tabular data file + if (file.isIngestProblem()) { + //We allow anyone who can publish to uningest in order to clear a problem + User u = session.getUser(); + if (!u.isAuthenticated() || !(permissionService.permissionsFor(u, file).contains(Permission.PublishDataset))) { + logger.warning("User: " + u.getIdentifier() + " tried to uningest a file"); + // Shouldn't happen (choice not displayed for users who don't have the right + // permission), but check anyway + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("file.ingest.cantUningestFileWarning")); + return null; + } + file.setIngestDone(); + file.setIngestReport(null); + } else { + //Shouldn't happen - got called when there is no tabular data or an ingest problem + JH.addMessage(FacesMessage.SEVERITY_WARN, + BundleUtil.getStringFromBundle("file.ingest.cantUningestFileWarning")); + return null; + } + } else { + //Superuser required to uningest after a success + //Uningest command does it's own check for isSuperuser + commandEngine.submit(new UningestFileCommand(dvRequestService.getDataverseRequest(), file)); + Long dataFileId = file.getId(); + file = datafileService.find(dataFileId); + } + editDataset = file.getOwner(); + if (editDataset.isReleased()) { + try { + ExportService instance = ExportService.getInstance(); + instance.exportAllFormats(editDataset); + + } catch (ExportException ex) { + // Something went wrong! + // Just like with indexing, a failure to export is not a fatal + // condition. We'll just log the error as a warning and keep + // going: + logger.log(Level.WARNING, "Uningest: Exception while exporting:{0}", ex.getMessage()); + } + } + datafileService.save(file); + + // Refresh filemetadata with file title, etc. + init(); + JH.addMessage(FacesMessage.SEVERITY_INFO, BundleUtil.getStringFromBundle("file.uningest.complete")); + return returnToDraftVersion(); + } + private List filesToBeDeleted = new ArrayList<>(); public String deleteFile() { @@ -948,6 +1098,12 @@ public boolean isPubliclyDownloadable() { return FileUtil.isPubliclyDownloadable(fileMetadata); } + public boolean isIngestable() { + DataFile f = fileMetadata.getDataFile(); + //Datafile is an ingestable type and hasn't been ingested yet or had an ingest fail + return (FileUtil.canIngestAsTabular(f)&&!(f.isTabularData() || f.isIngestProblem())); + } + private Boolean lockedFromEditsVar; private Boolean lockedFromDownloadVar; @@ -1069,7 +1225,7 @@ public String preview(ExternalTool externalTool) { ApiToken apiToken = null; User user = session.getUser(); if (fileMetadata.getDatasetVersion().isDraft() || fileMetadata.getDatasetVersion().isDeaccessioned() || (fileMetadata.getDataFile().isRestricted()) || (FileUtil.isActivelyEmbargoed(fileMetadata))) { - apiToken=fileDownloadService.getApiToken(user); + apiToken=authService.getValidApiTokenForUser(user); } if(externalTool == null){ return ""; @@ -1247,7 +1403,129 @@ public String getEmbargoPhrase() { return BundleUtil.getStringFromBundle("embargoed.willbeuntil"); } } - + + public boolean isValidRetentionSelection() { + if (!fileMetadata.getDataFile().isReleased()) { + return true; + } + return false; + } + + public boolean isExistingRetention() { + if (!fileMetadata.getDataFile().isReleased() && (fileMetadata.getDataFile().getRetention() != null)) { + return true; + } + return false; + } + + public boolean isRetentionForWholeSelection() { + return isValidRetentionSelection(); + } + + public Retention getSelectionRetention() { + return selectionRetention; + } + + public void setSelectionRetention(Retention selectionRetention) { + this.selectionRetention = selectionRetention; + } + + private Retention selectionRetention = new Retention(); + + private boolean removeRetention=false; + + public boolean isRemoveRetention() { + return removeRetention; + } + + public void setRemoveRetention(boolean removeRetention) { + boolean existing = this.removeRetention; + this.removeRetention = removeRetention; + if (existing != this.removeRetention) { + logger.info("State flip"); + selectionRetention = new Retention(); + if (removeRetention) { + selectionRetention = new Retention(null, null); + } + } + PrimeFaces.current().resetInputs("fileForm:retentionInputs"); + } + + public String saveRetention() { + + if(isRemoveRetention() || (selectionRetention.getDateUnavailable()==null && selectionRetention.getReason()==null)) { + selectionRetention=null; + } + + Retention ret = null; + // Note: this.fileMetadata.getDataFile() is not the same object as this.file. + // (Not sure there's a good reason for this other than that's the way it is.) + // So changes to this.fileMetadata.getDataFile() will not be saved with + // editDataset = this.file.getOwner() set as it is below. + if (!file.isReleased()) { + ret = file.getRetention(); + if (ret != null) { + logger.fine("Before: " + ret.getDataFiles().size()); + ret.getDataFiles().remove(fileMetadata.getDataFile()); + logger.fine("After: " + ret.getDataFiles().size()); + } + if (selectionRetention != null) { + retentionService.merge(selectionRetention); + } + file.setRetention(selectionRetention); + if (ret != null && !ret.getDataFiles().isEmpty()) { + ret = null; + } + } + if(selectionRetention!=null) { + retentionService.save(selectionRetention, ((AuthenticatedUser)session.getUser()).getIdentifier()); + } + // success message: + String successMessage = BundleUtil.getStringFromBundle("file.assignedRetention.success"); + logger.fine(successMessage); + successMessage = successMessage.replace("{0}", "Selected Files"); + JsfHelper.addFlashMessage(successMessage); + selectionRetention = new Retention(); + + //Caller has to set editDataset before calling save() + editDataset = this.file.getOwner(); + + save(); + init(); + if(ret!=null) { + retentionService.delete(ret,((AuthenticatedUser)session.getUser()).getIdentifier()); + } + return returnToDraftVersion(); + } + + public void clearRetentionPopup() { + setRemoveRetention(false); + selectionRetention = new Retention(); + PrimeFaces.current().resetInputs("fileForm:retentionInputs"); + } + + public void clearSelectionRetention() { + selectionRetention = new Retention(); + PrimeFaces.current().resetInputs("fileForm:retentionInputs"); + } + + public boolean isCantRequestDueToRetention() { + return FileUtil.isRetentionExpired(fileMetadata); + } + + public String getRetentionPhrase() { + //Should only be getting called when there is a retention + if(file.isReleased()) { + if(FileUtil.isRetentionExpired(file)) { + return BundleUtil.getStringFromBundle("retention.after"); + } else { + return BundleUtil.getStringFromBundle("retention.isfrom"); + } + } else { + return BundleUtil.getStringFromBundle("retention.willbeafter"); + } + } + public String getToolTabTitle(){ if (getAllAvailableTools().size() > 1) { return BundleUtil.getStringFromBundle("file.toolTab.header"); diff --git a/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java b/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java index 62f10c18bdf..e3ed507a9c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java +++ b/src/main/java/edu/harvard/iq/dataverse/FileSearchCriteria.java @@ -12,7 +12,7 @@ public class FileSearchCriteria { * Status of the particular DataFile based on active embargoes and restriction state */ public enum FileAccessStatus { - Public, Restricted, EmbargoedThenRestricted, EmbargoedThenPublic + Public, Restricted, EmbargoedThenRestricted, EmbargoedThenPublic, RetentionPeriodExpired } public FileSearchCriteria(String contentType, FileAccessStatus accessStatus, String categoryName, String tabularTagName, String searchText) { diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index 890b146a61c..a542cb52ac0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -6,7 +6,7 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; import java.net.MalformedURLException; @@ -33,7 +33,7 @@ public GlobalId(String protocol, String authority, String identifier, String sep this.separator = separator; } this.urlPrefix = urlPrefix; - this.managingProviderName = providerName; + this.managingProviderId = providerName; } // protocol the identifier system, e.g. "doi" @@ -42,7 +42,7 @@ public GlobalId(String protocol, String authority, String identifier, String sep private String protocol; private String authority; private String identifier; - private String managingProviderName; + private String managingProviderId; private String separator = "/"; private String urlPrefix; @@ -67,8 +67,8 @@ public String getIdentifier() { return identifier; } - public String getProvider() { - return managingProviderName; + public String getProviderId() { + return managingProviderId; } public String toString() { diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java index 9fb584a9133..f54b1fb6117 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookPage.java @@ -288,19 +288,21 @@ public String save() { Command cmd; try { + // Per recent #dv-tech conversation w/ Jim - copying the code + // below from his QDR branch; the code that used to be here called + // UpdateDataverseCommand when saving new guestbooks, and that involved + // an unnecessary reindexing of the dataverse (and, in some cases, + // reindexing of the underlying datasets). - L.A. if (editMode == EditMode.CREATE || editMode == EditMode.CLONE ) { guestbook.setCreateTime(new Timestamp(new Date().getTime())); - guestbook.setUsageCount(new Long(0)); + guestbook.setUsageCount(Long.valueOf(0)); guestbook.setEnabled(true); dataverse.getGuestbooks().add(guestbook); - cmd = new UpdateDataverseCommand(dataverse, null, null, dvRequestService.getDataverseRequest(), null); - commandEngine.submit(cmd); create = true; - } else { - cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest()); - commandEngine.submit(cmd); - } - + } + cmd = new UpdateDataverseGuestbookCommand(dataverse, guestbook, dvRequestService.getDataverseRequest()); + commandEngine.submit(cmd); + } catch (EJBException ex) { StringBuilder error = new StringBuilder(); error.append(ex).append(" "); diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java index 976f1e084ac..1ea7d02791d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponse.java @@ -17,6 +17,8 @@ import java.util.List; import jakarta.persistence.*; import jakarta.validation.constraints.Size; +import java.util.Collections; +import java.util.Comparator; /** * @@ -99,7 +101,7 @@ public class GuestbookResponse implements Serializable { */ public static final String ACCESS_REQUEST = "AccessRequest"; - static final String DOWNLOAD = "Download"; + public static final String DOWNLOAD = "Download"; static final String SUBSET = "Subset"; static final String EXPLORE = "Explore"; @@ -178,7 +180,7 @@ public GuestbookResponse(GuestbookResponse source){ this.setSessionId(source.getSessionId()); List customQuestionResponses = new ArrayList<>(); if (!source.getCustomQuestionResponses().isEmpty()){ - for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponses() ){ + for (CustomQuestionResponse customQuestionResponse : source.getCustomQuestionResponsesSorted() ){ CustomQuestionResponse customQuestionResponseAdd = new CustomQuestionResponse(); customQuestionResponseAdd.setResponse(customQuestionResponse.getResponse()); customQuestionResponseAdd.setCustomQuestion(customQuestionResponse.getCustomQuestion()); @@ -254,6 +256,18 @@ public String getResponseDate() { public List getCustomQuestionResponses() { return customQuestionResponses; } + + public List getCustomQuestionResponsesSorted(){ + + Collections.sort(customQuestionResponses, (CustomQuestionResponse cqr1, CustomQuestionResponse cqr2) -> { + int a = cqr1.getCustomQuestion().getDisplayOrder(); + int b = cqr2.getCustomQuestion().getDisplayOrder(); + return Integer.valueOf(a).compareTo(b); + }); + + + return customQuestionResponses; + } public void setCustomQuestionResponses(List customQuestionResponses) { this.customQuestionResponses = customQuestionResponses; @@ -317,7 +331,11 @@ public void setSessionId(String sessionId) { this.sessionId= sessionId; } - public String toHtmlFormattedResponse() { + public String toHtmlFormattedResponse(){ + return toHtmlFormattedResponse(null); + } + + public String toHtmlFormattedResponse(AuthenticatedUser requestor) { StringBuilder sb = new StringBuilder(); @@ -326,17 +344,25 @@ public String toHtmlFormattedResponse() { sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.respondent") + "
    \n
  • " + BundleUtil.getStringFromBundle("name") + ": " + getName() + "
  • \n
  • "); sb.append(" " + BundleUtil.getStringFromBundle("email") + ": " + getEmail() + "
  • \n
  • "); - sb.append( - " " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "
  • \n
  • "); - sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
\n"); + sb.append(" " + BundleUtil.getStringFromBundle("institution") + ": " + wrapNullAnswer(getInstitution()) + "\n
  • "); + sb.append(" " + BundleUtil.getStringFromBundle("position") + ": " + wrapNullAnswer(getPosition()) + "
  • "); + + //Add requestor information to response to help dataset admin with request processing + if (requestor != null){ + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.id") + ": " + requestor.getId()+ "
  • "); + sb.append("\n
  • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.requestor.identifier") + ": " + requestor.getIdentifier()+ "
  • \n"); + } else { + sb.append("\n"); + } + sb.append(BundleUtil.getStringFromBundle("dataset.guestbookResponse.guestbook.additionalQuestions") + ":
      \n"); - for (CustomQuestionResponse cqr : getCustomQuestionResponses()) { + for (CustomQuestionResponse cqr : getCustomQuestionResponsesSorted()) { sb.append("
    • " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.question") + ": " + cqr.getCustomQuestion().getQuestionString() + "
      " + BundleUtil.getStringFromBundle("dataset.guestbookResponse.answer") + ": " - + wrapNullAnswer(cqr.getResponse()) + "
    • \n"); + + wrapNullAnswer(cqr.getResponse()) + "\n
      "); } sb.append("
    "); return sb.toString(); diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java index b0cc41eb448..6c043b78941 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponseServiceBean.java @@ -432,7 +432,7 @@ public Long findCountByGuestbookId(Long guestbookId, Long dataverseId) { Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } else { - String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + "and o.guestbook_id = " + guestbookId; + String queryString = "select count(o) from GuestbookResponse as o, Dataset d, DvObject obj where o.dataset_id = d.id and d.id = obj.id and obj.owner_id = " + dataverseId + " and o.guestbook_id = " + guestbookId; Query query = em.createNativeQuery(queryString); return (Long) query.getSingleResult(); } @@ -914,7 +914,7 @@ public void save(GuestbookResponse guestbookResponse) { public Long getDownloadCountByDataFileId(Long dataFileId) { // datafile id is null, will return 0 - Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); + Query query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.datafile_id = " + dataFileId + " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); return (Long) query.getSingleResult(); } @@ -928,7 +928,7 @@ public Long getDownloadCountByDatasetId(Long datasetId, LocalDate date) { if(date != null) { query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId + " and responsetime < '" + date.toString() + "' and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); }else { - query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ "and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); + query = em.createNativeQuery("select count(o.id) from GuestbookResponse o where o.dataset_id = " + datasetId+ " and eventtype != '" + GuestbookResponse.ACCESS_REQUEST +"'"); } return (Long) query.getSingleResult(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java index c53df93def8..4276eb02882 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/GuestbookResponsesPage.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.List; import java.util.logging.Logger; @@ -101,8 +102,9 @@ public String init() { private String getFileName(){ // The fix below replaces any spaces in the name of the dataverse with underscores; // without it, the filename was chopped off (by the browser??), and the user - // was getting the file name "Foo", instead of "Foo and Bar in Social Sciences.csv". -- L.A. - return dataverse.getName().replace(' ', '_') + "_" + guestbook.getId() + "_GuestbookReponses.csv"; + // was getting the file name "Foo", instead of "Foo and Bar in Social Sciences.csv". -- L.A. + // Also removing some chars that have been reported to cause issues with certain browsers + return FileUtil.sanitizeFileName(dataverse.getName() + "_" + guestbook.getId() + "_GuestbookResponses.csv"); } public void streamResponsesByDataverseAndGuestbook(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java index 6dbba34920b..0b66b652e0c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingSetsPage.java @@ -30,6 +30,8 @@ import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; +import java.util.HashMap; +import java.util.Map; import org.apache.commons.lang3.StringUtils; /** @@ -430,44 +432,92 @@ public boolean isSessionUserAuthenticated() { return false; } + // The numbers of datasets and deleted/exported records below are used + // in rendering rules on the page. They absolutely need to be cached + // on the first lookup. + + Map cachedSetInfoNumDatasets = new HashMap<>(); + public int getSetInfoNumOfDatasets(OAISet oaiSet) { if (oaiSet.isDefaultSet()) { return getSetInfoNumOfExported(oaiSet); } + if (cachedSetInfoNumDatasets.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDatasets.get(oaiSet.getSpec()); + } + String query = oaiSet.getDefinition(); try { int num = oaiSetService.validateDefinitionQuery(query); if (num > -1) { + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), num); return num; } } catch (OaiSetException ose) { - // do notghin - will return zero. + // do nothing - will return zero. } + cachedSetInfoNumDatasets.put(oaiSet.getSpec(), 0); return 0; } + Map cachedSetInfoNumExported = new HashMap<>(); + Integer defaultSetNumExported = null; + public int getSetInfoNumOfExported(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumExported != null) { + return defaultSetNumExported; + } else if (cachedSetInfoNumExported.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumExported.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findActiveOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumExported = num; + } else { + cachedSetInfoNumExported.put(oaiSet.getSpec(), num); + } + return num; } + Map cachedSetInfoNumDeleted = new HashMap<>(); + Integer defaultSetNumDeleted = null; + public int getSetInfoNumOfDeleted(OAISet oaiSet) { + if (oaiSet.isDefaultSet() && defaultSetNumDeleted != null) { + return defaultSetNumDeleted; + } else if (cachedSetInfoNumDeleted.get(oaiSet.getSpec()) != null) { + return cachedSetInfoNumDeleted.get(oaiSet.getSpec()); + } + List records = oaiRecordService.findDeletedOaiRecordsBySetName(oaiSet.getSpec()); + int num; + if (records == null || records.isEmpty()) { - return 0; + num = 0; + } else { + num = records.size(); } - return records.size(); + if (oaiSet.isDefaultSet()) { + defaultSetNumDeleted = num; + } else { + cachedSetInfoNumDeleted.put(oaiSet.getSpec(), num); + } + return num; } public void validateSetQuery() { diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 72fc6ee6d64..7359ef8eb33 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean; import edu.harvard.iq.dataverse.dataset.DatasetUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -24,11 +25,15 @@ import java.util.Arrays; import java.util.Date; import java.util.List; +import java.util.Objects; +import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.annotation.Resource; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; +import jakarta.inject.Inject; +import jakarta.inject.Named; import jakarta.mail.Address; import jakarta.mail.Message; import jakarta.mail.MessagingException; @@ -78,77 +83,133 @@ public class MailServiceBean implements java.io.Serializable { */ public MailServiceBean() { } + + /** + * Creates a new instance of MailServiceBean with explicit injection, as used during testing. + */ + public MailServiceBean(Session session, SettingsServiceBean settingsService) { + this.session = session; + this.settingsService = settingsService; + } - @Resource(name = "mail/notifyMailSession") + @Inject + @Named("mail/systemSession") private Session session; public boolean sendSystemEmail(String to, String subject, String messageText) { return sendSystemEmail(to, subject, messageText, false); } - + + /** + * Send a system notification to one or multiple recipients by email. + * Will skip sending when {@link #getSystemAddress()} doesn't return a configured "from" address. + * @param to A comma separated list of one or multiple recipients' addresses. May contain a "personal name" and + * the recipients address in <>. See also {@link InternetAddress}. + * @param subject The message's subject + * @param messageText The message's text + * @param isHtmlContent Determine if the message text is formatted using HTML or plain text. + * @return Status: true if sent successfully, false otherwise + */ public boolean sendSystemEmail(String to, String subject, String messageText, boolean isHtmlContent) { + Optional optionalAddress = getSystemAddress(); + if (optionalAddress.isEmpty()) { + logger.fine(() -> "Skipping sending mail to " + to + ", because no system address has been set."); + return false; + } + InternetAddress systemAddress = optionalAddress.get(); - boolean sent = false; - InternetAddress systemAddress = getSystemAddress(); - - String body = messageText - + (isHtmlContent ? BundleUtil.getStringFromBundle("notification.email.closing.html", Arrays.asList(BrandingUtil.getSupportTeamEmailAddress(systemAddress), BrandingUtil.getSupportTeamName(systemAddress))) - : BundleUtil.getStringFromBundle("notification.email.closing", Arrays.asList(BrandingUtil.getSupportTeamEmailAddress(systemAddress), BrandingUtil.getSupportTeamName(systemAddress)))); + String body = messageText + + BundleUtil.getStringFromBundle(isHtmlContent ? "notification.email.closing.html" : "notification.email.closing", + List.of(BrandingUtil.getSupportTeamEmailAddress(systemAddress), BrandingUtil.getSupportTeamName(systemAddress))); - logger.fine("Sending email to " + to + ". Subject: <<<" + subject + ">>>. Body: " + body); + logger.fine(() -> "Sending email to %s. Subject: <<<%s>>>. Body: %s".formatted(to, subject, body)); try { + // Since JavaMail 1.6, we have support for UTF-8 mail addresses and do not need to handle these ourselves. + InternetAddress[] recipients = InternetAddress.parse(to); + MimeMessage msg = new MimeMessage(session); - if (systemAddress != null) { - msg.setFrom(systemAddress); - msg.setSentDate(new Date()); - String[] recipientStrings = to.split(","); - InternetAddress[] recipients = new InternetAddress[recipientStrings.length]; - for (int i = 0; i < recipients.length; i++) { - try { - recipients[i] = new InternetAddress(recipientStrings[i], "", charset); - } catch (UnsupportedEncodingException ex) { - logger.severe(ex.getMessage()); - } - } - msg.setRecipients(Message.RecipientType.TO, recipients); - msg.setSubject(subject, charset); - if (isHtmlContent) { - msg.setText(body, charset, "html"); - } else { - msg.setText(body, charset); - } - - try { - Transport.send(msg, recipients); - sent = true; - } catch (MessagingException ssfe) { - logger.warning("Failed to send mail to: " + to); - logger.warning("MessagingException Message: " + ssfe); - } + msg.setFrom(systemAddress); + msg.setSentDate(new Date()); + msg.setRecipients(Message.RecipientType.TO, recipients); + msg.setSubject(subject, charset); + if (isHtmlContent) { + msg.setText(body, charset, "html"); } else { - logger.fine("Skipping sending mail to " + to + ", because the \"no-reply\" address not set (" + Key.SystemEmail + " setting)."); + msg.setText(body, charset); } - } catch (AddressException ae) { - logger.warning("Failed to send mail to " + to); - ae.printStackTrace(System.out); - } catch (MessagingException me) { - logger.warning("Failed to send mail to " + to); - me.printStackTrace(System.out); + + Transport.send(msg, recipients); + return true; + } catch (MessagingException ae) { + logger.log(Level.WARNING, "Failed to send mail to %s: %s".formatted(to, ae.getMessage()), ae); + logger.info("When UTF-8 characters in recipients: make sure MTA supports it and JVM option " + JvmSettings.MAIL_MTA_SUPPORT_UTF8.getScopedKey() + "=true"); } - return sent; + return false; } - - public InternetAddress getSystemAddress() { - String systemEmail = settingsService.getValueForKey(Key.SystemEmail); - return MailUtil.parseSystemAddress(systemEmail); + + /** + * Lookup the system mail address ({@code InternetAddress} may contain personal and actual address). + * @return The system mail address or an empty {@code Optional} if not configured. + */ + public Optional getSystemAddress() { + boolean providedByDB = false; + String mailAddress = JvmSettings.SYSTEM_EMAIL.lookupOptional().orElse(null); + + // Try lookup of (deprecated) database setting only if not configured via MPCONFIG + if (mailAddress == null) { + mailAddress = settingsService.getValueForKey(Key.SystemEmail); + // Encourage people to migrate from deprecated setting + if (mailAddress != null) { + providedByDB = true; + logger.warning("The :SystemMail DB setting has been deprecated, please reconfigure using JVM option " + JvmSettings.SYSTEM_EMAIL.getScopedKey()); + } + } + + try { + // Parse and return. + return Optional.of(new InternetAddress(Objects.requireNonNull(mailAddress), true)); + } catch (AddressException e) { + logger.log(Level.WARNING, "Could not parse system mail address '%s' provided by %s: " + .formatted(providedByDB ? "DB setting" : "JVM option", mailAddress), e); + } catch (NullPointerException e) { + // Do not pester the logs - no configuration may mean someone wants to disable mail notifications + logger.fine("Could not find a system mail setting in database (key :SystemEmail, deprecated) or JVM option '" + JvmSettings.SYSTEM_EMAIL.getScopedKey() + "'"); + } + // We define the system email address as an optional setting, in case people do not want to enable mail + // notifications (like in a development context, but might be useful elsewhere, too). + return Optional.empty(); + } + + /** + * Lookup the support team mail address ({@code InternetAddress} may contain personal and actual address). + * Will default to return {@code #getSystemAddress} if not configured. + * @return Support team mail address + */ + public Optional getSupportAddress() { + Optional supportMailAddress = JvmSettings.SUPPORT_EMAIL.lookupOptional(); + if (supportMailAddress.isPresent()) { + try { + return Optional.of(new InternetAddress(supportMailAddress.get(), true)); + } catch (AddressException e) { + logger.log(Level.WARNING, "Could not parse support mail address '%s', defaulting to system address: ".formatted(supportMailAddress.get()), e); + } + } + return getSystemAddress(); } //@Resource(name="mail/notifyMailSession") public void sendMail(String reply, String to, String cc, String subject, String messageText) { + Optional optionalAddress = getSystemAddress(); + if (optionalAddress.isEmpty()) { + logger.fine(() -> "Skipping sending mail to " + to + ", because no system address has been set."); + return; + } + // Always send from system address to avoid email being blocked + InternetAddress fromAddress = optionalAddress.get(); + try { MimeMessage msg = new MimeMessage(session); - // Always send from system address to avoid email being blocked - InternetAddress fromAddress = getSystemAddress(); + try { setContactDelegation(reply, fromAddress); } catch (UnsupportedEncodingException ex) { @@ -395,7 +456,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio GuestbookResponse gbr = far.getGuestbookResponse(); if (gbr != null) { messageText += MessageFormat.format( - BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse()); + BundleUtil.getStringFromBundle("notification.email.requestFileAccess.guestbookResponse"), gbr.toHtmlFormattedResponse(requestor)); } return messageText; case GRANTFILEACCESS: @@ -466,18 +527,24 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio case RETURNEDDS: version = (DatasetVersion) targetObject; pattern = BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewer"); - String optionalReturnReason = ""; - /* - FIXME - Setting up to add single comment when design completed - optionalReturnReason = "."; - if (comment != null && !comment.isEmpty()) { - optionalReturnReason = ".\n\n" + BundleUtil.getStringFromBundle("wasReturnedReason") + "\n\n" + comment; - } - */ + String[] paramArrayReturnedDataset = {version.getDataset().getDisplayName(), getDatasetDraftLink(version.getDataset()), - version.getDataset().getOwner().getDisplayName(), getDataverseLink(version.getDataset().getOwner()), optionalReturnReason}; + version.getDataset().getOwner().getDisplayName(), getDataverseLink(version.getDataset().getOwner())}; messageText += MessageFormat.format(pattern, paramArrayReturnedDataset); + + if (comment != null && !comment.isEmpty()) { + messageText += "\n\n" + MessageFormat.format(BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewerReason"), comment); + } + + Dataverse d = (Dataverse) version.getDataset().getOwner(); + List contactEmailList = new ArrayList(); + for (DataverseContact dc : d.getDataverseContacts()) { + contactEmailList.add(dc.getContactEmail()); + } + if (!contactEmailList.isEmpty()) { + String contactEmails = String.join(", ", contactEmailList); + messageText += "\n\n" + MessageFormat.format(BundleUtil.getStringFromBundle("notification.email.wasReturnedByReviewer.collectionContacts"), contactEmails); + } return messageText; case WORKFLOW_SUCCESS: @@ -505,13 +572,12 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio messageText += MessageFormat.format(pattern, paramArrayStatus); return messageText; case CREATEACC: - InternetAddress systemAddress = getSystemAddress(); String accountCreatedMessage = BundleUtil.getStringFromBundle("notification.email.welcome", Arrays.asList( BrandingUtil.getInstallationBrandName(), systemConfig.getGuidesBaseUrl(), systemConfig.getGuidesVersion(), - BrandingUtil.getSupportTeamName(systemAddress), - BrandingUtil.getSupportTeamEmailAddress(systemAddress) + BrandingUtil.getSupportTeamName(getSystemAddress().orElse(null)), + BrandingUtil.getSupportTeamEmailAddress(getSystemAddress().orElse(null)) )); String optionalConfirmEmailAddon = confirmEmailService.optionalConfirmEmailAddonMsg(userNotification.getUser()); accountCreatedMessage += optionalConfirmEmailAddon; diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java index ca2f6145cba..1ead0b13cdc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageFilePermissionsPage.java @@ -422,7 +422,10 @@ public void grantAccess(ActionEvent evt) { // set request(s) granted, if they exist for (AuthenticatedUser au : roleAssigneeService.getExplicitUsers(roleAssignee)) { FileAccessRequest far = file.getAccessRequestForAssignee(au); - far.setStateGranted(); + //There may not be a request, so do the null check + if (far != null) { + far.setStateGranted(); + } } datafileService.save(file); } diff --git a/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java b/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java index cc89cfd9d56..d1cc515fd01 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManageGuestbooksPage.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseGuestbookRootCommand; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.JsfHelper; import static edu.harvard.iq.dataverse.util.JsfHelper.JH; import java.util.LinkedList; @@ -220,7 +221,8 @@ private String getFileName(){ // The fix below replaces any spaces in the name of the dataverse with underscores; // without it, the filename was chopped off (by the browser??), and the user // was getting the file name "Foo", instead of "Foo and Bar in Social Sciences.csv". -- L.A. - return dataverse.getName().replace(' ', '_') + "_GuestbookReponses.csv"; + // Also removing some chars that have been reported to cause issues with certain browsers + return FileUtil.sanitizeFileName(dataverse.getName() + "_GuestbookResponses.csv"); } public void deleteGuestbook() { diff --git a/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java index bb6daa264ba..1e2a34f5472 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MetadataBlockServiceBean.java @@ -1,43 +1,82 @@ package edu.harvard.iq.dataverse; -import java.util.List; import jakarta.ejb.Stateless; import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.TypedQuery; +import jakarta.persistence.criteria.*; + +import java.util.List; /** - * * @author michael */ @Stateless @Named public class MetadataBlockServiceBean { - + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; - + public MetadataBlock save(MetadataBlock mdb) { - return em.merge(mdb); - } - - + return em.merge(mdb); + } + public List listMetadataBlocks() { + return listMetadataBlocks(false); + } + + public List listMetadataBlocks(boolean onlyDisplayedOnCreate) { + if (onlyDisplayedOnCreate) { + return listMetadataBlocksDisplayedOnCreate(null); + } return em.createNamedQuery("MetadataBlock.listAll", MetadataBlock.class).getResultList(); } - - public MetadataBlock findById( Long id ) { + + public MetadataBlock findById(Long id) { return em.find(MetadataBlock.class, id); } - - public MetadataBlock findByName( String name ) { + + public MetadataBlock findByName(String name) { try { return em.createNamedQuery("MetadataBlock.findByName", MetadataBlock.class) - .setParameter("name", name) - .getSingleResult(); - } catch ( NoResultException nre ) { + .setParameter("name", name) + .getSingleResult(); + } catch (NoResultException nre) { return null; } } + + public List listMetadataBlocksDisplayedOnCreate(Dataverse ownerDataverse) { + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(MetadataBlock.class); + Root metadataBlockRoot = criteriaQuery.from(MetadataBlock.class); + Join datasetFieldTypeJoin = metadataBlockRoot.join("datasetFieldTypes"); + Predicate displayOnCreatePredicate = criteriaBuilder.isTrue(datasetFieldTypeJoin.get("displayOnCreate")); + + if (ownerDataverse != null) { + Root dataverseRoot = criteriaQuery.from(Dataverse.class); + Join datasetFieldTypeInputLevelJoin = dataverseRoot.join("dataverseFieldTypeInputLevels", JoinType.LEFT); + + Predicate requiredPredicate = criteriaBuilder.and( + datasetFieldTypeInputLevelJoin.get("datasetFieldType").in(metadataBlockRoot.get("datasetFieldTypes")), + criteriaBuilder.isTrue(datasetFieldTypeInputLevelJoin.get("required"))); + + Predicate unionPredicate = criteriaBuilder.or(displayOnCreatePredicate, requiredPredicate); + + criteriaQuery.where(criteriaBuilder.and( + criteriaBuilder.equal(dataverseRoot.get("id"), ownerDataverse.getId()), + metadataBlockRoot.in(dataverseRoot.get("metadataBlocks")), + unionPredicate + )); + } else { + criteriaQuery.where(displayOnCreatePredicate); + } + + criteriaQuery.select(metadataBlockRoot).distinct(true); + TypedQuery typedQuery = em.createQuery(criteriaQuery); + return typedQuery.getResultList(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java b/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java index 832d7ec19ef..54fb8f211a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/NavigationWrapper.java @@ -16,6 +16,7 @@ import java.util.logging.Logger; import jakarta.faces.context.FacesContext; import jakarta.faces.view.ViewScoped; +import jakarta.ws.rs.core.Response.Status; import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.servlet.http.HttpServletRequest; @@ -87,6 +88,10 @@ public String notAuthorized(){ } } + public String tooManyRequests() { + return sendError(Status.TOO_MANY_REQUESTS.getStatusCode()); + } + public String notFound() { return sendError(HttpServletResponse.SC_NOT_FOUND); } diff --git a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java index a1de33a764e..a389cbc735b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/PermissionServiceBean.java @@ -41,6 +41,9 @@ import java.util.stream.Collectors; import static java.util.stream.Collectors.toList; import jakarta.persistence.Query; +import jakarta.persistence.criteria.CriteriaBuilder; +import jakarta.persistence.criteria.CriteriaQuery; +import jakarta.persistence.criteria.Root; /** * Your one-stop-shop for deciding which user can do what action on which @@ -94,6 +97,9 @@ public class PermissionServiceBean { @Inject DataverseRequestServiceBean dvRequestService; + @Inject + DatasetVersionFilesServiceBean datasetVersionFilesServiceBean; + /** * A request-level permission query (e.g includes IP ras). */ @@ -439,22 +445,14 @@ private Set getInferredPermissions(DvObject dvo) { * download permission for everybody: */ private boolean isPublicallyDownloadable(DvObject dvo) { - if (dvo instanceof DataFile) { + if (dvo instanceof DataFile df) { // unrestricted files that are part of a release dataset // automatically get download permission for everybody: // -- L.A. 4.0 beta12 - - DataFile df = (DataFile) dvo; - if (!df.isRestricted()) { - if (df.getOwner().getReleasedVersion() != null) { - if (df.getOwner().getReleasedVersion().getFileMetadatas() != null) { - for (FileMetadata fm : df.getOwner().getReleasedVersion().getFileMetadatas()) { - if (df.equals(fm.getDataFile())) { - return true; - } - } - } + DatasetVersion releasedVersion = df.getOwner().getReleasedVersion(); + if (releasedVersion != null) { + return datasetVersionFilesServiceBean.isDataFilePresentInDatasetVersion(releasedVersion, df); } } } @@ -837,4 +835,57 @@ public boolean isMatchingWorkflowLock(Dataset d, String userId, String invocatio return false; } + /** + * Checks if a DataverseRequest can download at least one file of the target DatasetVersion. + * + * @param dataverseRequest DataverseRequest to check + * @param datasetVersion DatasetVersion to check + * @return boolean indicating whether the user can download at least one file or not + */ + public boolean canDownloadAtLeastOneFile(DataverseRequest dataverseRequest, DatasetVersion datasetVersion) { + if (hasUnrestrictedReleasedFiles(datasetVersion)) { + return true; + } + List fileMetadatas = datasetVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : fileMetadatas) { + DataFile dataFile = fileMetadata.getDataFile(); + Set roleAssignees = new HashSet<>(groupService.groupsFor(dataverseRequest, dataFile)); + roleAssignees.add(dataverseRequest.getUser()); + if (hasGroupPermissionsFor(roleAssignees, dataFile, EnumSet.of(Permission.DownloadFile))) { + return true; + } + } + return false; + } + + /** + * Checks if a DatasetVersion has unrestricted released files. + * + * This method is mostly based on {@link #isPublicallyDownloadable(DvObject)} although in this case, instead of basing + * the search on a particular file, it searches for the total number of files in the target version that are present + * in the released version. + * + * @param targetDatasetVersion DatasetVersion to check + * @return boolean indicating whether the dataset version has released files or not + */ + private boolean hasUnrestrictedReleasedFiles(DatasetVersion targetDatasetVersion) { + Dataset targetDataset = targetDatasetVersion.getDataset(); + if (!targetDataset.isReleased()) { + return false; + } + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); + CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(Long.class); + Root datasetVersionRoot = criteriaQuery.from(DatasetVersion.class); + Root fileMetadataRoot = criteriaQuery.from(FileMetadata.class); + criteriaQuery + .select(criteriaBuilder.count(fileMetadataRoot)) + .where(criteriaBuilder.and( + criteriaBuilder.equal(fileMetadataRoot.get("dataFile").get("restricted"), false), + criteriaBuilder.equal(datasetVersionRoot.get("dataset"), targetDataset), + criteriaBuilder.equal(datasetVersionRoot.get("versionState"), DatasetVersion.VersionState.RELEASED), + fileMetadataRoot.in(targetDatasetVersion.getFileMetadatas()), + fileMetadataRoot.in(datasetVersionRoot.get("fileMetadatas")))); + Long result = em.createQuery(criteriaQuery).getSingleResult(); + return result > 0; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/Retention.java b/src/main/java/edu/harvard/iq/dataverse/Retention.java new file mode 100644 index 00000000000..e1bd2231570 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/Retention.java @@ -0,0 +1,102 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.util.BundleUtil; +import jakarta.persistence.*; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.Objects; + +@NamedQueries({ + @NamedQuery( name="Retention.findAll", + query = "SELECT r FROM Retention r"), + @NamedQuery( name="Retention.findById", + query = "SELECT r FROM Retention r WHERE r.id=:id"), + @NamedQuery( name="Retention.findByDateUnavailable", + query = "SELECT r FROM Retention r WHERE r.dateUnavailable=:dateUnavailable"), + @NamedQuery( name="Retention.deleteById", + query = "DELETE FROM Retention r WHERE r.id=:id") +}) +@Entity +public class Retention { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(nullable = false) + private LocalDate dateUnavailable; + + @Column(columnDefinition="TEXT") + private String reason; + + @OneToMany(mappedBy="retention", cascade={ CascadeType.REMOVE, CascadeType.PERSIST}) + private List dataFiles; + + public Retention(){ + dateUnavailable = LocalDate.now().plusYears(1000); // Most likely valid with respect to configuration + } + + public Retention(LocalDate dateUnavailable, String reason) { + this.dateUnavailable = dateUnavailable; + this.reason = reason; + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public LocalDate getDateUnavailable() { + return dateUnavailable; + } + + public void setDateUnavailable(LocalDate dateUnavailable) { + this.dateUnavailable = dateUnavailable; + } + + public String getFormattedDateUnavailable() { + return getDateUnavailable().format(DateTimeFormatter.ISO_LOCAL_DATE.withLocale(BundleUtil.getCurrentLocale())); + } + + public String getReason() { + return reason; + } + + public void setReason(String reason) { + this.reason = reason; + } + + public List getDataFiles() { + return dataFiles; + } + + public void setDataFiles(List dataFiles) { + this.dataFiles = dataFiles; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Retention retention = (Retention) o; + return id.equals(retention.id) && dateUnavailable.equals(retention.dateUnavailable) && Objects.equals(reason, retention.reason); + } + + @Override + public int hashCode() { + return Objects.hash(id, dateUnavailable, reason); + } + + @Override + public String toString() { + return "Retention{" + + "id=" + id + + ", dateUnavailable=" + dateUnavailable + + ", reason='" + reason + '\'' + + '}'; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/RetentionServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RetentionServiceBean.java new file mode 100644 index 00000000000..1421ac61120 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/RetentionServiceBean.java @@ -0,0 +1,66 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; +import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; +import jakarta.ejb.EJB; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; + +import java.util.List; + + +@Stateless +@Named +public class RetentionServiceBean { + + @PersistenceContext + EntityManager em; + + @EJB + ActionLogServiceBean actionLogSvc; + + public List findAllRetentions() { + return em.createNamedQuery("Retention.findAll", Retention.class).getResultList(); + } + + public Retention findByRetentionId(Long id) { + Query query = em.createNamedQuery("Retention.findById", Retention.class); + query.setParameter("id", id); + try { + return (Retention) query.getSingleResult(); + } catch (Exception ex) { + return null; + } + } + + public Retention merge(Retention r) { + return em.merge(r); + } + + public Long save(Retention retention, String userIdentifier) { + if (retention.getId() == null) { + em.persist(retention); + em.flush(); + } + //Not quite from a command, but this action can be done by anyone, so command seems better than Admin or other alternatives + actionLogSvc.log(new ActionLogRecord(ActionLogRecord.ActionType.Command, "retentionCreate") + .setInfo("id: " + retention.getId() + " date unavailable: " + retention.getDateUnavailable() + " reason: " + retention.getReason()).setUserIdentifier(userIdentifier)); + return retention.getId(); + } + + private int deleteById(long id, String userIdentifier) { + //Not quite from a command, but this action can be done by anyone, so command seems better than Admin or other alternatives + actionLogSvc.log(new ActionLogRecord(ActionLogRecord.ActionType.Command, "retentionDelete") + .setInfo(Long.toString(id)) + .setUserIdentifier(userIdentifier)); + return em.createNamedQuery("Retention.deleteById") + .setParameter("id", id) + .executeUpdate(); + } + public int delete(Retention retention, String userIdentifier) { + return deleteById(retention.getId(), userIdentifier); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java index 059d5a8ffd3..88acc1916cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/RoleAssigneeServiceBean.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.mydata.MyDataFilterParams; import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil; @@ -96,18 +97,18 @@ public RoleAssignee getRoleAssignee(String identifier, Boolean augmented) { if (identifier == null || identifier.isEmpty()) { throw new IllegalArgumentException("Identifier cannot be null or empty string."); } - switch (identifier.charAt(0)) { - case ':': + switch (identifier.substring(0,1)) { + case ":": return predefinedRoleAssignees.get(identifier); - case '@': + case AuthenticatedUser.IDENTIFIER_PREFIX: if (!augmented){ return authSvc.getAuthenticatedUser(identifier.substring(1)); } else { return authSvc.getAuthenticatedUserWithProvider(identifier.substring(1)); - } - case '&': + } + case Group.IDENTIFIER_PREFIX: return groupSvc.getGroup(identifier.substring(1)); - case '#': + case PrivateUrlUser.PREFIX: return PrivateUrlUtil.identifier2roleAssignee(identifier); default: throw new IllegalArgumentException("Unsupported assignee identifier '" + identifier + "'"); diff --git a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java index 71318a0184a..a387b27d98b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/S3PackageImporter.java @@ -17,6 +17,7 @@ import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; import edu.harvard.iq.dataverse.api.AbstractApiBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.BufferedReader; @@ -203,35 +204,21 @@ public DataFile createPackageDataFile(Dataset dataset, String folderName, long t fmd.setDatasetVersion(dataset.getLatestVersion()); FileUtil.generateS3PackageStorageIdentifier(packageFile); - - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); + PidProvider pidProvider = commandEngine.getContext().dvObjects().getEffectivePidGenerator(dataset); if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { - String packageIdentifier = idServiceBean.generateDataFileIdentifier(packageFile); - packageFile.setIdentifier(packageIdentifier); - } - - String nonNullDefaultIfKeyNotFound = ""; - String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - - if (packageFile.getProtocol() == null) { - packageFile.setProtocol(protocol); - } - if (packageFile.getAuthority() == null) { - packageFile.setAuthority(authority); + pidProvider.generatePid(packageFile); } if (!packageFile.isIdentifierRegistered()) { String doiRetString = ""; - idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); try { - doiRetString = idServiceBean.createIdentifier(packageFile); + doiRetString = pidProvider.createIdentifier(packageFile); } catch (Throwable e) { } // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { + if (!pidProvider.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { packageFile.setIdentifierRegistered(true); packageFile.setGlobalIdCreateTime(new Date()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java index 6be768321c4..46941c8b5b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java +++ b/src/main/java/edu/harvard/iq/dataverse/SendFeedbackDialog.java @@ -6,7 +6,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.MailUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.Optional; import java.util.Random; @@ -101,8 +101,7 @@ public void initUserInput(ActionEvent ae) { op1 = Long.valueOf(random.nextInt(10)); op2 = Long.valueOf(random.nextInt(10)); userSum = null; - String supportEmail = JvmSettings.SUPPORT_EMAIL.lookupOptional().orElse(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - systemAddress = MailUtil.parseSystemAddress(supportEmail); + systemAddress = mailService.getSupportAddress().orElse(null); } public Long getOp1() { @@ -130,6 +129,10 @@ public void setUserSum(Long userSum) { } public String getMessageTo() { + if (op1 == null || op2 == null) { + // Fix for 403 error page: initUserInput method doesn't call before + initUserInput(null); + } if (feedbackTarget == null) { return BrandingUtil.getSupportTeamName(systemAddress); } else if (feedbackTarget.isInstanceofDataverse()) { @@ -217,6 +220,7 @@ public String sendMessage() { } logger.fine("sending feedback: " + feedback); mailService.sendMail(feedback.getFromEmail(), feedback.getToEmail(), feedback.getCcEmail(), feedback.getSubject(), feedback.getBody()); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("contact.sent")); return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java index 0a1d0effc03..48196591b19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/SettingsWrapper.java @@ -6,6 +6,9 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.Setting; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -63,6 +66,9 @@ public class SettingsWrapper implements java.io.Serializable { @EJB MetadataBlockServiceBean mdbService; + + @EJB + MailServiceBean mailServiceBean; private Map settingsMap; @@ -72,6 +78,9 @@ public class SettingsWrapper implements java.io.Serializable { private boolean embargoDateChecked = false; private LocalDate maxEmbargoDate = null; + private boolean retentionDateChecked = false; + private LocalDate minRetentionDate = null; + private String siteUrl = null; private Dataverse rootDataverse = null; @@ -296,14 +305,16 @@ public boolean isPublicInstall(){ } return publicInstall; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload() { if (rsyncUpload == null) { rsyncUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString()); } return rsyncUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { if (rsyncDownload == null) { rsyncDownload = systemConfig.isRsyncDownload(); @@ -333,12 +344,29 @@ public boolean isGlobusFileDownload() { } public boolean isGlobusEnabledStorageDriver(String driverId) { - if (globusStoreList == null) { - globusStoreList = systemConfig.getGlobusStoresList(); + return (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId)); + } + + public boolean isDownloadable(FileMetadata fmd) { + boolean downloadable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + + downloadable = downloadable && StorageIO.isDataverseAccessible(driverId); } - return globusStoreList.contains(driverId); + return downloadable; } + public boolean isGlobusTransferable(FileMetadata fmd) { + boolean globusTransferable=true; + if(isGlobusFileDownload()) { + String driverId = DataAccess.getStorageDriverFromIdentifier(fmd.getDataFile().getStorageIdentifier()); + globusTransferable = GlobusAccessibleStore.isGlobusAccessible(driverId); + } + return globusTransferable; + } + + public String getGlobusAppUrl() { if (globusAppUrl == null) { globusAppUrl = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); @@ -353,7 +381,8 @@ public boolean isWebloaderUpload() { } return webloaderUpload; } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly() { if (rsyncOnly == null) { String downloadMethods = getValueForKey(SettingsServiceBean.Key.DownloadMethods); @@ -372,7 +401,7 @@ public boolean isRsyncOnly() { } return rsyncOnly; } - + public boolean isHTTPUpload(){ if (httpUpload == null) { httpUpload = getUploadMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString()); @@ -380,22 +409,15 @@ public boolean isHTTPUpload(){ return httpUpload; } - public boolean isDataFilePIDSequentialDependent(){ - if (dataFilePIDSequentialDependent == null) { - dataFilePIDSequentialDependent = systemConfig.isDataFilePIDSequentialDependent(); - } - return dataFilePIDSequentialDependent; - } - public String getSupportTeamName() { - String systemEmail = getValueForKey(SettingsServiceBean.Key.SystemEmail); - InternetAddress systemAddress = MailUtil.parseSystemAddress(systemEmail); + // TODO: should this be replaced with mailServiceBean.getSupportAddress() to expose a configured support team? + InternetAddress systemAddress = mailServiceBean.getSystemAddress().orElse(null); return BrandingUtil.getSupportTeamName(systemAddress); } public String getSupportTeamEmail() { - String systemEmail = getValueForKey(SettingsServiceBean.Key.SystemEmail); - InternetAddress systemAddress = MailUtil.parseSystemAddress(systemEmail); + // TODO: should this be replaced with mailServiceBean.getSupportAddress() to expose a configured support team? + InternetAddress systemAddress = mailServiceBean.getSystemAddress().orElse(null); return BrandingUtil.getSupportTeamEmailAddress(systemAddress) != null ? BrandingUtil.getSupportTeamEmailAddress(systemAddress) : BrandingUtil.getSupportTeamName(systemAddress); } @@ -450,23 +472,6 @@ public Map getConfiguredLocales() { return configuredLocales; } - public boolean isDoiInstallation() { - String protocol = getValueForKey(SettingsServiceBean.Key.Protocol); - if ("doi".equals(protocol)) { - return true; - } else { - return false; - } - } - - public boolean isDataCiteInstallation() { - String protocol = getValueForKey(SettingsServiceBean.Key.DoiProvider); - if ("DataCite".equals(protocol)) { - return true; - } else { - return false; - } - } public boolean isMakeDataCountDisplayEnabled() { boolean safeDefaultIfKeyNotFound = (getValueForKey(SettingsServiceBean.Key.MDCLogPath)!=null); //Backward compatible @@ -583,6 +588,89 @@ public void validateEmbargoDate(FacesContext context, UIComponent component, Obj } } + public LocalDate getMinRetentionDate() { + if (!retentionDateChecked) { + String months = getValueForKey(Key.MinRetentionDurationInMonths); + Long minMonths = null; + if (months != null) { + try { + minMonths = Long.parseLong(months); + } catch (NumberFormatException nfe) { + logger.warning("Cant interpret :MinRetentionDurationInMonths as a long"); + } + } + + if (minMonths != null && minMonths != 0) { + if (minMonths == -1) { + minMonths = 0l; // Absolute minimum is 0 + } + minRetentionDate = LocalDate.now().plusMonths(minMonths); + } + retentionDateChecked = true; + } + return minRetentionDate; + } + + public LocalDate getMaxRetentionDate() { + Long maxMonths = 12000l; // Arbitrary cutoff at 1000 years - needs to keep maxDate < year 999999999 and + // somehwere 1K> x >10K years the datepicker widget stops showing a popup + // calendar + return LocalDate.now().plusMonths(maxMonths); + } + + public boolean isValidRetentionDate(Retention r) { + + if (r.getDateUnavailable()==null || + isRetentionAllowed() && r.getDateUnavailable().isAfter(getMinRetentionDate())) { + return true; + } + + return false; + } + + public boolean isRetentionAllowed() { + //Need a valid :MinRetentionDurationInMonths setting to allow retentions + return getMinRetentionDate()!=null; + } + + public void validateRetentionDate(FacesContext context, UIComponent component, Object value) + throws ValidatorException { + if (isRetentionAllowed()) { + UIComponent cb = component.findComponent("retentionCheckbox"); + UIInput endComponent = (UIInput) cb; + boolean removedState = false; + if (endComponent != null) { + try { + removedState = (Boolean) endComponent.getSubmittedValue(); + } catch (NullPointerException npe) { + // Do nothing - checkbox is not being shown (and is therefore not checked) + } + } + if (!removedState && value == null) { + String msgString = BundleUtil.getStringFromBundle("retention.date.required"); + FacesMessage msg = new FacesMessage(msgString); + msg.setSeverity(FacesMessage.SEVERITY_ERROR); + throw new ValidatorException(msg); + } + Retention newR = new Retention(((LocalDate) value), null); + if (!isValidRetentionDate(newR)) { + String minDate = getMinRetentionDate().format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); + String maxDate = getMaxRetentionDate().format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); + String msgString = BundleUtil.getStringFromBundle("retention.date.invalid", + Arrays.asList(minDate, maxDate)); + // If we don't throw an exception here, the datePicker will use it's own + // vaidator and display a default message. The value for that can be set by + // adding validatorMessage="#{bundle['retention.date.invalid']}" (a version with + // no params) to the datepicker + // element in file-edit-popup-fragment.html, but it would be better to catch all + // problems here (so we can show a message with the min/max dates). + FacesMessage msg = new FacesMessage(msgString); + msg.setSeverity(FacesMessage.SEVERITY_ERROR); + throw new ValidatorException(msg); + } + } + } + Map languageMap = null; public Map getBaseMetadataLanguageMap(boolean refresh) { diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index 24c0f9d7926..f9cf061e771 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -59,6 +59,8 @@ public class Shib implements java.io.Serializable { SettingsServiceBean settingsService; @EJB SystemConfig systemConfig; + @EJB + UserServiceBean userService; HttpServletRequest request; @@ -259,6 +261,7 @@ else if (ShibAffiliationOrder.equals("firstAffiliation")) { state = State.REGULAR_LOGIN_INTO_EXISTING_SHIB_ACCOUNT; logger.fine("Found user based on " + userPersistentId + ". Logging in."); logger.fine("Updating display info for " + au.getName()); + userService.updateLastLogin(au); authSvc.updateAuthenticatedUser(au, displayInfo); logInUserAndSetShibAttributes(au); String prettyFacesHomePageString = getPrettyFacesHomePageString(false); diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 319ae06eefb..d31fdd4e380 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -6,24 +6,21 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; - -import static edu.harvard.iq.dataverse.dataset.DatasetUtil.datasetLogoThumbnail; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.search.SolrSearchResult; -import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; -import java.io.InputStream; -import java.util.Base64; import java.util.HashMap; import java.util.Map; +import java.util.logging.Logger; + import jakarta.ejb.EJB; import jakarta.enterprise.context.RequestScoped; -import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; import jakarta.inject.Named; -import org.apache.commons.io.IOUtils; /** * @@ -33,6 +30,9 @@ @RequestScoped @Named public class ThumbnailServiceWrapper implements java.io.Serializable { + + private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName()); + @Inject PermissionsWrapper permissionsWrapper; @EJB @@ -46,49 +46,7 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectThumbnailsMap = new HashMap<>(); private Map dvobjectViewMap = new HashMap<>(); - - private String getAssignedDatasetImage(Dataset dataset, int size) { - if (dataset == null) { - return null; - } - - DataFile assignedThumbnailFile = dataset.getThumbnailFile(); - - if (assignedThumbnailFile != null) { - Long assignedThumbnailFileId = assignedThumbnailFile.getId(); - - if (this.dvobjectThumbnailsMap.containsKey(assignedThumbnailFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+assignedThumbnailFileId); - if (!"".equals(this.dvobjectThumbnailsMap.get(assignedThumbnailFileId))) { - return this.dvobjectThumbnailsMap.get(assignedThumbnailFileId); - } - return null; - } - - String imageSourceBase64 = ImageThumbConverter.getImageThumbnailAsBase64(assignedThumbnailFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - - if (imageSourceBase64 != null) { - this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, imageSourceBase64); - return imageSourceBase64; - } - - // OK - we can't use this "assigned" image, because of permissions, or because - // the thumbnail failed to generate, etc... in this case we'll - // mark this dataset in the lookup map - so that we don't have to - // do all these lookups again... - this.dvobjectThumbnailsMap.put(assignedThumbnailFileId, ""); - - // TODO: (?) - // do we need to cache this datafile object in the view map? - // -- L.A., 4.2.2 - } - - return null; - - } + private Map hasThumbMap = new HashMap<>(); // it's the responsibility of the user - to make sure the search result // passed to this method is of the Datafile type! @@ -130,7 +88,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { if ((!((DataFile)result.getEntity()).isRestricted() || permissionsWrapper.hasDownloadFilePermission(result.getEntity())) - && dataFileService.isThumbnailAvailable((DataFile) result.getEntity())) { + && isThumbnailAvailable((DataFile) result.getEntity())) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( (DataFile) result.getEntity(), @@ -156,9 +114,16 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { return null; } + public boolean isThumbnailAvailable(DataFile entity) { + if(!hasThumbMap.containsKey(entity.getId())) { + hasThumbMap.put(entity.getId(), dataFileService.isThumbnailAvailable(entity)); + } + return hasThumbMap.get(entity.getId()); + } + // it's the responsibility of the user - to make sure the search result // passed to this method is of the Dataset type! - public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { + public String getDatasetCardImageAsUrl(SolrSearchResult result) { // Before we do anything else, check if it's a harvested dataset; // no need to check anything else if so (harvested datasets never have // thumbnails) @@ -176,13 +141,14 @@ public String getDatasetCardImageAsBase64Url(SolrSearchResult result) { return null; } Dataset dataset = (Dataset)result.getEntity(); + dataset.setId(result.getEntityId()); Long versionId = result.getDatasetVersionId(); - return getDatasetCardImageAsBase64Url(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + return getDatasetCardImageAsUrl(dataset, versionId, result.isPublishedState(), ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); } - public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, boolean autoselect, int size) { + public String getDatasetCardImageAsUrl(Dataset dataset, Long versionId, boolean autoselect, int size) { Long datasetId = dataset.getId(); if (datasetId != null) { if (this.dvobjectThumbnailsMap.containsKey(datasetId)) { @@ -203,112 +169,34 @@ public String getDatasetCardImageAsBase64Url(Dataset dataset, Long versionId, bo this.dvobjectThumbnailsMap.put(datasetId, ""); return null; } - - String cardImageUrl = null; - StorageIO dataAccess = null; - - try{ - dataAccess = DataAccess.getStorageIO(dataset); - } - catch(IOException ioex){ - // ignore - } - - InputStream in = null; - // See if the dataset already has a dedicated thumbnail ("logo") saved as - // an auxilary file on the dataset level: - // (don't bother checking if it exists; just try to open the input stream) - try { - in = dataAccess.getAuxFileAsInputStream(datasetLogoThumbnail + ".thumb" + size); - //thumb48addedByImageThumbConverter); - } catch (Exception ioex) { - //ignore - } - - if (in != null) { - try { - byte[] bytes = IOUtils.toByteArray(in); - String base64image = Base64.getEncoder().encodeToString(bytes); - cardImageUrl = FileUtil.DATA_URI_SCHEME + base64image; - this.dvobjectThumbnailsMap.put(datasetId, cardImageUrl); - return cardImageUrl; - } catch (IOException ex) { - this.dvobjectThumbnailsMap.put(datasetId, ""); - return null; - // (alternatively, we could ignore the exception, and proceed with the - // regular process of selecting the thumbnail from the available - // image files - ?) - } finally - { - IOUtils.closeQuietly(in); - } - } - - // If not, see if the dataset has one of its image files already assigned - // to be the designated thumbnail: - cardImageUrl = this.getAssignedDatasetImage(dataset, size); - - if (cardImageUrl != null) { - //logger.info("dataset id " + result.getEntity().getId() + " has a dedicated image assigned; returning " + cardImageUrl); - return cardImageUrl; - } - - // And finally, try to auto-select the thumbnail (unless instructed not to): - - if (!autoselect) { - return null; - } - - // We attempt to auto-select via the optimized, native query-based method - // from the DatasetVersionService: - Long thumbnailImageFileId = datasetVersionService.getThumbnailByVersionId(versionId); - - if (thumbnailImageFileId != null) { - //cardImageUrl = FILE_CARD_IMAGE_URL + thumbnailImageFileId; - if (this.dvobjectThumbnailsMap.containsKey(thumbnailImageFileId)) { - // Yes, return previous answer - //logger.info("using cached result for ... "+datasetId); - if (!"".equals(this.dvobjectThumbnailsMap.get(thumbnailImageFileId))) { - return this.dvobjectThumbnailsMap.get(thumbnailImageFileId); - } - return null; - } + DataFile thumbnailFile = dataset.getThumbnailFile(); - DataFile thumbnailImageFile = null; + if (thumbnailFile == null) { - if (dvobjectViewMap.containsKey(thumbnailImageFileId) - && dvobjectViewMap.get(thumbnailImageFileId).isInstanceofDataFile()) { - thumbnailImageFile = (DataFile) dvobjectViewMap.get(thumbnailImageFileId); - } else { - thumbnailImageFile = dataFileService.findCheapAndEasy(thumbnailImageFileId); - if (thumbnailImageFile != null) { - // TODO: - // do we need this file on the map? - it may not even produce - // a thumbnail! - dvobjectViewMap.put(thumbnailImageFileId, thumbnailImageFile); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); - return null; + boolean hasDatasetLogo = false; + StorageIO storageIO = null; + try { + storageIO = DataAccess.getStorageIO(dataset); + if (storageIO != null && storageIO.isAuxObjectCached(DatasetUtil.datasetLogoFilenameFinal)) { + // If not, return null/use the default, otherwise pass the logo URL + hasDatasetLogo = true; } + } catch (IOException ioex) { + logger.warning("getDatasetCardImageAsUrl(): Failed to initialize dataset StorageIO for " + + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } - - if (dataFileService.isThumbnailAvailable(thumbnailImageFile)) { - cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( - thumbnailImageFile, - size); - //ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } - - if (cardImageUrl != null) { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, cardImageUrl); - } else { - this.dvobjectThumbnailsMap.put(thumbnailImageFileId, ""); + // If no other logo we attempt to auto-select via the optimized, native + // query-based method + // from the DatasetVersionService: + if (!hasDatasetLogo && datasetVersionService.getThumbnailByVersionId(versionId) == null) { + return null; } } - - //logger.info("dataset id " + result.getEntityId() + ", returning " + cardImageUrl); - - return cardImageUrl; + String url = SystemConfig.getDataverseSiteUrlStatic() + "/api/datasets/" + dataset.getId() + "/logo"; + logger.fine("getDatasetCardImageAsUrl: " + url); + this.dvobjectThumbnailsMap.put(datasetId,url); + return url; + } // it's the responsibility of the user - to make sure the search result @@ -320,6 +208,7 @@ public String getDataverseCardImageAsBase64Url(SolrSearchResult result) { public void resetObjectMaps() { dvobjectThumbnailsMap = new HashMap<>(); dvobjectViewMap = new HashMap<>(); + hasThumbMap = new HashMap<>(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java index 93892376edc..d63fcfa3e34 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserServiceBean.java @@ -147,6 +147,8 @@ private AuthenticatedUser createAuthenticatedUserForView (Object[] dbRowValues, user.setMutedEmails(Type.tokenizeToSet((String) dbRowValues[15])); user.setMutedNotifications(Type.tokenizeToSet((String) dbRowValues[15])); + user.setRateLimitTier((int)dbRowValues[17]); + user.setRoles(roles); return user; } @@ -419,7 +421,7 @@ private List getUserListCore(String searchTerm, qstr += " u.createdtime, u.lastlogintime, u.lastapiusetime, "; qstr += " prov.id, prov.factoryalias, "; qstr += " u.deactivated, u.deactivatedtime, "; - qstr += " u.mutedEmails, u.mutedNotifications "; + qstr += " u.mutedEmails, u.mutedNotifications, u.rateLimitTier "; qstr += " FROM authenticateduser u,"; qstr += " authenticateduserlookup prov_lookup,"; qstr += " authenticationproviderrow prov"; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index 0a0861fa1c9..19df6d8c1c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -1,30 +1,8 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingDataverse; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingDataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseRoleServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.MetadataBlockServiceBean; -import edu.harvard.iq.dataverse.PermissionServiceBean; -import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; -import edu.harvard.iq.dataverse.UserServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.actionlogging.ActionLogServiceBean; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.RoleAssignee; @@ -38,44 +16,50 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.exception.RateLimitCommandException; import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean; +import edu.harvard.iq.dataverse.metrics.MetricsServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; -import java.io.StringReader; -import java.net.URI; -import java.util.Arrays; -import java.util.Collections; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.logging.Level; -import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonReader; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.persistence.EntityManager; import jakarta.persistence.NoResultException; import jakarta.persistence.PersistenceContext; import jakarta.servlet.http.HttpServletRequest; +import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response.ResponseBuilder; import jakarta.ws.rs.core.Response.Status; +import java.io.InputStream; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.logging.Level; +import java.util.logging.Logger; + import static org.apache.commons.lang3.StringUtils.isNumeric; /** @@ -132,23 +116,21 @@ public Response refineResponse( String message ) { * In the common case of the wrapped response being of type JSON, * return the message field it has (if any). * @return the content of a message field, or {@code null}. + * @throws JsonException when JSON parsing fails. */ String getWrappedMessageWhenJson() { if ( response.getMediaType().equals(MediaType.APPLICATION_JSON_TYPE) ) { Object entity = response.getEntity(); if ( entity == null ) return null; - String json = entity.toString(); - try ( StringReader rdr = new StringReader(json) ){ - JsonReader jrdr = Json.createReader(rdr); - JsonObject obj = jrdr.readObject(); - if ( obj.containsKey("message") ) { - JsonValue message = obj.get("message"); - return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString(); - } else { - return null; - } + JsonObject obj = JsonUtil.getJsonObject(entity.toString()); + if ( obj.containsKey("message") ) { + JsonValue message = obj.get("message"); + return message.getValueType() == ValueType.STRING ? obj.getString("message") : message.toString(); + } else { + return null; } + } else { return null; } @@ -390,6 +372,11 @@ protected DataverseLinkingDataverse findDataverseLinkingDataverseOrDie(String da } protected Dataset findDatasetOrDie(String id) throws WrappedResponse { + return findDatasetOrDie(id, false); + } + + protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedResponse { + Long datasetId; Dataset dataset; if (id.equals(PERSISTENT_ID_KEY)) { String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1)); @@ -397,28 +384,67 @@ protected Dataset findDatasetOrDie(String id) throws WrappedResponse { throw new WrappedResponse( badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); } - dataset = datasetSvc.findByGlobalId(persistentId); - if (dataset == null) { - throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.persistentId", Collections.singletonList(persistentId)))); + GlobalId globalId; + try { + globalId = PidUtil.parseAsGlobalID(persistentId); + } catch (IllegalArgumentException e) { + throw new WrappedResponse( + badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.bad.id", Collections.singletonList(persistentId)))); + } + datasetId = dvObjSvc.findIdByGlobalId(globalId, DvObject.DType.Dataset); + if (datasetId == null) { + datasetId = dvObjSvc.findIdByAltGlobalId(globalId, DvObject.DType.Dataset); + } + if (datasetId == null) { + throw new WrappedResponse( + notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1))))); } - return dataset; - } else { try { - dataset = datasetSvc.find(Long.parseLong(id)); - if (dataset == null) { - throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.id", Collections.singletonList(id)))); - } - return dataset; + datasetId = Long.parseLong(id); } catch (NumberFormatException nfe) { throw new WrappedResponse( badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.bad.id", Collections.singletonList(id)))); } } + if (deep) { + dataset = datasetSvc.findDeep(datasetId); + } else { + dataset = datasetSvc.find(datasetId); + } + if (dataset == null) { + throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.id", Collections.singletonList(id)))); + } + return dataset; } - + + protected DatasetVersion findDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse { + DatasetVersion dsv = execCommand(handleVersion(versionNumber, new Datasets.DsVersionHandler>() { + + @Override + public Command handleLatest() { + return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + + @Override + public Command handleDraft() { + return new GetDraftDatasetVersionCommand(req, ds); + } + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); + } + })); + return dsv; + } + protected DataFile findDataFileOrDie(String id) throws WrappedResponse { - DataFile datafile; if (id.equals(PERSISTENT_ID_KEY)) { String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1)); @@ -526,17 +552,21 @@ protected DvObject findDvo( Long id ) { * with that alias. If that fails, tries to get a {@link Dataset} with that global id. * @param id a value identifying the DvObject, either numeric of textual. * @return A DvObject, or {@code null} + * @throws WrappedResponse */ - protected DvObject findDvo( String id ) { - if ( isNumeric(id) ) { - return findDvo( Long.valueOf(id)) ; + @NotNull + protected DvObject findDvo(@NotNull final String id) throws WrappedResponse { + DvObject d = null; + if (isNumeric(id)) { + d = findDvo(Long.valueOf(id)); } else { - Dataverse d = dataverseSvc.findByAlias(id); - return ( d != null ) ? - d : datasetSvc.findByGlobalId(id); - + d = dataverseSvc.findByAlias(id); } - } + if (d == null) { + return findDatasetOrDie(id); + } + return d; + } protected T failIfNull( T t, String errorMessage ) throws WrappedResponse { if ( t != null ) return t; @@ -571,6 +601,8 @@ protected T execCommand( Command cmd ) throws WrappedResponse { try { return engineSvc.submit(cmd); + } catch (RateLimitCommandException ex) { + throw new WrappedResponse(rateLimited(ex.getMessage())); } catch (IllegalCommandException ex) { //for 8859 for api calls that try to update datasets with TOA out of compliance if (ex.getMessage().toLowerCase().contains("terms of use")){ @@ -661,7 +693,15 @@ protected Response ok( JsonArrayBuilder bld ) { .add("data", bld).build()) .type(MediaType.APPLICATION_JSON).build(); } - + + protected Response ok( JsonArrayBuilder bld , long totalCount) { + return Response.ok(Json.createObjectBuilder() + .add("status", ApiConstants.STATUS_OK) + .add("totalCount", totalCount) + .add("data", bld).build()) + .type(MediaType.APPLICATION_JSON).build(); + } + protected Response ok( JsonArray ja ) { return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) @@ -728,6 +768,11 @@ protected Response ok(String data, MediaType mediaType, String downloadFilename) return res.build(); } + protected Response ok(InputStream inputStream) { + ResponseBuilder res = Response.ok().entity(inputStream).type(MediaType.valueOf(FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT)); + return res.build(); + } + protected Response created( String uri, JsonObjectBuilder bld ) { return Response.created( URI.create(uri) ) .entity( Json.createObjectBuilder() @@ -759,11 +804,15 @@ protected Response notFound( String msg ) { protected Response badRequest( String msg ) { return error( Status.BAD_REQUEST, msg ); } - + protected Response forbidden( String msg ) { return error( Status.FORBIDDEN, msg ); } - + + protected Response rateLimited( String msg ) { + return error( Status.TOO_MANY_REQUESTS, msg ); + } + protected Response conflict( String msg ) { return error( Status.CONFLICT, msg ); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Access.java b/src/main/java/edu/harvard/iq/dataverse/api/Access.java index 696fcb34920..00da4990996 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Access.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Access.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.DataFileZipper; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.OptionalAccessService; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.datavariable.DataVariable; @@ -129,6 +130,14 @@ import jakarta.ws.rs.core.MediaType; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; + +import org.eclipse.microprofile.openapi.annotations.Operation; +import org.eclipse.microprofile.openapi.annotations.media.Content; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.eclipse.microprofile.openapi.annotations.parameters.RequestBody; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponses; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataParam; @@ -328,8 +337,8 @@ public Response datafile(@Context ContainerRequestContext crc, @PathParam("fileI dInfo.addServiceAvailable(new OptionalAccessService("preprocessed", "application/json", "format=prep", "Preprocessed data in JSON")); dInfo.addServiceAvailable(new OptionalAccessService("subset", "text/tab-separated-values", "variables=<LIST>", "Column-wise Subsetting")); } - - if(systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList().contains(DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()))) { + String driverId = DataAccess.getStorageDriverFromIdentifier(df.getStorageIdentifier()); + if(systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) { dInfo.addServiceAvailable(new OptionalAccessService("GlobusTransfer", df.getContentType(), "format=GlobusTransfer", "Download via Globus")); } @@ -465,7 +474,9 @@ public String tabularDatafileMetadataDDI(@Context ContainerRequestContext crc, @ if (!dataFile.isTabularData()) { throw new BadRequestException("tabular data required"); } - + if (FileUtil.isRetentionExpired(dataFile)) { + throw new BadRequestException("unable to download file with expired retention"); + } if (dataFile.isRestricted() || FileUtil.isActivelyEmbargoed(dataFile)) { boolean hasPermissionToDownloadFile = false; DataverseRequest dataverseRequest; @@ -920,14 +931,15 @@ public void write(OutputStream os) throws IOException, } } else { boolean embargoed = FileUtil.isActivelyEmbargoed(file); - if (file.isRestricted() || embargoed) { + boolean retentionExpired = FileUtil.isRetentionExpired(file); + if (file.isRestricted() || embargoed || retentionExpired) { if (zipper == null) { fileManifest = fileManifest + file.getFileMetadata().getLabel() + " IS " - + (embargoed ? "EMBARGOED" : "RESTRICTED") + + (embargoed ? "EMBARGOED" : retentionExpired ? "RETENTIONEXPIRED" : "RESTRICTED") + " AND CANNOT BE DOWNLOADED\r\n"; } else { zipper.addToManifest(file.getFileMetadata().getLabel() + " IS " - + (embargoed ? "EMBARGOED" : "RESTRICTED") + + (embargoed ? "EMBARGOED" : retentionExpired ? "RETENTIONEXPIRED" : "RESTRICTED") + " AND CANNOT BE DOWNLOADED\r\n"); } } else { @@ -1244,6 +1256,20 @@ private String getWebappImageResource(String imageName) { @AuthRequired @Path("datafile/{fileId}/auxiliary/{formatTag}/{formatVersion}") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Save auxiliary file with version", + description = "Saves an auxiliary file") + @APIResponses(value = { + @APIResponse(responseCode = "200", + description = "File saved response"), + @APIResponse(responseCode = "403", + description = "User not authorized to edit the dataset."), + @APIResponse(responseCode = "400", + description = "File not found based on id.") + }) + @Tag(name = "saveAuxiliaryFileWithVersion", + description = "Save Auxiliary File With Version") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response saveAuxiliaryFileWithVersion(@Context ContainerRequestContext crc, @PathParam("fileId") Long fileId, @PathParam("formatTag") String formatTag, @@ -1401,6 +1427,10 @@ public Response requestFileAccess(@Context ContainerRequestContext crc, @PathPar return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.fileNotFound", args)); } + if (FileUtil.isRetentionExpired(dataFile)) { + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.failure.retentionExpired")); + } + if (!dataFile.getOwner().isFileAccessRequest()) { return error(BAD_REQUEST, BundleUtil.getStringFromBundle("access.api.requestAccess.requestsNotAccepted")); } @@ -1734,8 +1764,11 @@ private boolean isAccessAuthorized(User requestUser, DataFile df) { //True if there's an embargo that hasn't yet expired //In this state, we block access as though the file is restricted (even if it is not restricted) boolean embargoed = FileUtil.isActivelyEmbargoed(df); - - + // access is also blocked for retention expired files + boolean retentionExpired = FileUtil.isRetentionExpired(df); + // No access ever if retention is expired + if(retentionExpired) return false; + /* SEK 7/26/2018 for 3661 relying on the version state of the dataset versions to which this file is attached check to see if at least one is RELEASED @@ -1800,7 +1833,7 @@ private boolean isAccessAuthorized(User requestUser, DataFile df) { //The one case where we don't need to check permissions - if (!restricted && !embargoed && published) { + if (!restricted && !embargoed && !retentionExpired && published) { // If they are not published, they can still be downloaded, if the user // has the permission to view unpublished versions! (this case will // be handled below) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index fd3b9a89e54..d60884bad2f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -14,11 +14,12 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DataverseSession; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.validation.EMailValidator; import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.Template; import edu.harvard.iq.dataverse.TemplateServiceBean; import edu.harvard.iq.dataverse.UserServiceBean; @@ -96,8 +97,8 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteRoleCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteTemplateCommand; import edu.harvard.iq.dataverse.engine.command.impl.RegisterDvObjectCommand; -import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.userdata.UserListMaker; import edu.harvard.iq.dataverse.userdata.UserListResult; @@ -105,8 +106,10 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; +import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStream; @@ -122,6 +125,7 @@ import jakarta.ws.rs.QueryParam; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.StreamingOutput; +import java.nio.file.Paths; /** * Where the secure, setup API calls live. @@ -134,46 +138,48 @@ public class Admin extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Admin.class.getName()); - @EJB - AuthenticationProvidersRegistrationServiceBean authProvidersRegistrationSvc; - @EJB - BuiltinUserServiceBean builtinUserService; - @EJB - ShibServiceBean shibService; - @EJB - AuthTestDataServiceBean authTestDataService; - @EJB - UserServiceBean userService; - @EJB - IngestServiceBean ingestService; - @EJB - DataFileServiceBean fileService; - @EJB - DatasetServiceBean datasetService; - @EJB - DataverseServiceBean dataverseService; - @EJB - DatasetVersionServiceBean datasetversionService; - @Inject - DataverseRequestServiceBean dvRequestService; - @EJB - EjbDataverseEngine commandEngine; - @EJB - GroupServiceBean groupService; - @EJB - SettingsServiceBean settingsService; - @EJB - DatasetVersionServiceBean datasetVersionService; - @EJB - ExplicitGroupServiceBean explicitGroupService; - @EJB - BannerMessageServiceBean bannerMessageService; - @EJB - TemplateServiceBean templateService; - - // Make the session available - @Inject - DataverseSession session; + @EJB + AuthenticationProvidersRegistrationServiceBean authProvidersRegistrationSvc; + @EJB + BuiltinUserServiceBean builtinUserService; + @EJB + ShibServiceBean shibService; + @EJB + AuthTestDataServiceBean authTestDataService; + @EJB + UserServiceBean userService; + @EJB + IngestServiceBean ingestService; + @EJB + DataFileServiceBean fileService; + @EJB + DatasetServiceBean datasetService; + @EJB + DataverseServiceBean dataverseService; + @EJB + DvObjectServiceBean dvObjectService; + @EJB + DatasetVersionServiceBean datasetversionService; + @Inject + DataverseRequestServiceBean dvRequestService; + @EJB + EjbDataverseEngine commandEngine; + @EJB + GroupServiceBean groupService; + @EJB + SettingsServiceBean settingsService; + @EJB + DatasetVersionServiceBean datasetVersionService; + @EJB + ExplicitGroupServiceBean explicitGroupService; + @EJB + BannerMessageServiceBean bannerMessageService; + @EJB + TemplateServiceBean templateService; + + // Make the session available + @Inject + DataverseSession session; public static final String listUsersPartialAPIPath = "list-users"; public static final String listUsersFullAPIPath = "/api/admin/" + listUsersPartialAPIPath; @@ -195,7 +201,7 @@ public Response putSetting(@PathParam("name") String name, String content) { @Path("settings/{name}/lang/{lang}") @PUT - public Response putSetting(@PathParam("name") String name, @PathParam("lang") String lang, String content) { + public Response putSettingLang(@PathParam("name") String name, @PathParam("lang") String lang, String content) { Setting s = settingsSvc.set(name, lang, content); return ok("Setting " + name + " - " + lang + " - added."); } @@ -218,7 +224,7 @@ public Response deleteSetting(@PathParam("name") String name) { @Path("settings/{name}/lang/{lang}") @DELETE - public Response deleteSetting(@PathParam("name") String name, @PathParam("lang") String lang) { + public Response deleteSettingLang(@PathParam("name") String name, @PathParam("lang") String lang) { settingsSvc.delete(name, lang); return ok("Setting " + name + " - " + lang + " deleted."); } @@ -1024,29 +1030,49 @@ public Response deleteRole(@Context ContainerRequestContext crc, @PathParam("id" }, getRequestUser(crc)); } - @Path("superuser/{identifier}") - @POST - public Response toggleSuperuser(@PathParam("identifier") String identifier) { - ActionLogRecord alr = new ActionLogRecord(ActionLogRecord.ActionType.Admin, "toggleSuperuser") - .setInfo(identifier); - try { - AuthenticatedUser user = authSvc.getAuthenticatedUser(identifier); - if (user.isDeactivated()) { - return error(Status.BAD_REQUEST, "You cannot make a deactivated user a superuser."); - } + @Path("superuser/{identifier}") + @Deprecated + @POST + public Response toggleSuperuser(@PathParam("identifier") String identifier) { + ActionLogRecord alr = new ActionLogRecord(ActionLogRecord.ActionType.Admin, "toggleSuperuser") + .setInfo(identifier); + try { + final AuthenticatedUser user = authSvc.getAuthenticatedUser(identifier); + return setSuperuserStatus(user, !user.isSuperuser()); + } catch (Exception e) { + alr.setActionResult(ActionLogRecord.Result.InternalError); + alr.setInfo(alr.getInfo() + "// " + e.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); + } finally { + actionLogSvc.log(alr); + } + } - user.setSuperuser(!user.isSuperuser()); + private Response setSuperuserStatus(AuthenticatedUser user, Boolean isSuperuser) { + if (user.isDeactivated()) { + return error(Status.BAD_REQUEST, "You cannot make a deactivated user a superuser."); + } + user.setSuperuser(isSuperuser); + return ok("User " + user.getIdentifier() + " " + (user.isSuperuser() ? "set" : "removed") + + " as a superuser."); + } - return ok("User " + user.getIdentifier() + " " + (user.isSuperuser() ? "set" : "removed") - + " as a superuser."); - } catch (Exception e) { - alr.setActionResult(ActionLogRecord.Result.InternalError); - alr.setInfo(alr.getInfo() + "// " + e.getMessage()); - return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); - } finally { - actionLogSvc.log(alr); - } - } + @Path("superuser/{identifier}") + @PUT + // Using string instead of boolean so user doesn't need to add a Content-type header in their request + public Response setSuperuserStatus(@PathParam("identifier") String identifier, String isSuperuser) { + ActionLogRecord alr = new ActionLogRecord(ActionLogRecord.ActionType.Admin, "setSuperuserStatus") + .setInfo(identifier + ":" + isSuperuser); + try { + return setSuperuserStatus(authSvc.getAuthenticatedUser(identifier), StringUtil.isTrue(isSuperuser)); + } catch (Exception e) { + alr.setActionResult(ActionLogRecord.Result.InternalError); + alr.setInfo(alr.getInfo() + "// " + e.getMessage()); + return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); + } finally { + actionLogSvc.log(alr); + } + } @GET @Path("validate/datasets") @@ -1327,26 +1353,24 @@ public Response convertUserFromBcryptToSha1(String json) { } - @Path("permissions/{dvo}") - @AuthRequired - @GET - public Response findPermissonsOn(@Context ContainerRequestContext crc, @PathParam("dvo") String dvo) { - try { - DvObject dvObj = findDvo(dvo); - if (dvObj == null) { - return notFound("DvObject " + dvo + " not found"); - } - User aUser = getRequestUser(crc); - JsonObjectBuilder bld = Json.createObjectBuilder(); - bld.add("user", aUser.getIdentifier()); - bld.add("permissions", json(permissionSvc.permissionsFor(createDataverseRequest(aUser), dvObj))); - return ok(bld); - - } catch (Exception e) { - logger.log(Level.SEVERE, "Error while testing permissions", e); - return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); - } - } + @Path("permissions/{dvo}") + @AuthRequired + @GET + public Response findPermissonsOn(@Context final ContainerRequestContext crc, @PathParam("dvo") final String dvo) { + try { + final DvObject dvObj = findDvo(dvo); + final User aUser = getRequestUser(crc); + final JsonObjectBuilder bld = Json.createObjectBuilder(); + bld.add("user", aUser.getIdentifier()); + bld.add("permissions", json(permissionSvc.permissionsFor(createDataverseRequest(aUser), dvObj))); + return ok(bld); + } catch (WrappedResponse r) { + return r.getResponse(); + } catch (Exception e) { + logger.log(Level.SEVERE, "Error while testing permissions", e); + return error(Response.Status.INTERNAL_SERVER_ERROR, e.getMessage()); + } + } @Path("assignee/{idtf}") @GET @@ -1472,10 +1496,7 @@ public Response isOrcidEnabled() { public Response reregisterHdlToPID(@Context ContainerRequestContext crc, @PathParam("id") String id) { logger.info("Starting to reregister " + id + " Dataset Id. (from hdl to doi)" + new Date()); try { - if (settingsSvc.get(SettingsServiceBean.Key.Protocol.toString()).equals(HandlenetServiceBean.HDL_PROTOCOL)) { - logger.info("Bad Request protocol set to handle " ); - return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.set.for.doi")); - } + User u = getRequestUser(crc); if (!u.isSuperuser()) { @@ -1485,7 +1506,12 @@ public Response reregisterHdlToPID(@Context ContainerRequestContext crc, @PathPa DataverseRequest r = createDataverseRequest(u); Dataset ds = findDatasetOrDie(id); - if (ds.getIdentifier() != null && !ds.getIdentifier().isEmpty() && ds.getProtocol().equals(HandlenetServiceBean.HDL_PROTOCOL)) { + + if (HandlePidProvider.HDL_PROTOCOL.equals(dvObjectService.getEffectivePidGenerator(ds).getProtocol())) { + logger.info("Bad Request protocol set to handle " ); + return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.set.for.doi")); + } + if (ds.getIdentifier() != null && !ds.getIdentifier().isEmpty() && ds.getProtocol().equals(HandlePidProvider.HDL_PROTOCOL)) { execCommand(new RegisterDvObjectCommand(r, ds, true)); } else { return error(Status.BAD_REQUEST, BundleUtil.getStringFromBundle("admin.api.migrateHDL.failure.must.be.hdl.dataset")); @@ -2311,6 +2337,7 @@ public Response addBannerMessage(JsonObject jsonObject) throws WrappedResponse { BannerMessage toAdd = new BannerMessage(); try { + String dismissible = jsonObject.getString("dismissibleByUser"); boolean dismissibleByUser = false; @@ -2331,12 +2358,17 @@ public Response addBannerMessage(JsonObject jsonObject) throws WrappedResponse { messageText.setBannerMessage(toAdd); toAdd.getBannerMessageTexts().add(messageText); } - bannerMessageService.save(toAdd); - return ok("Banner Message added successfully."); + bannerMessageService.save(toAdd); + + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder() + .add("message", "Banner Message added successfully.") + .add("id", toAdd.getId()); + + return ok(jsonObjectBuilder); } catch (Exception e) { logger.warning("Unexpected Exception: " + e.getMessage()); - return error(Status.BAD_REQUEST, "Add Banner Message unexpected exception: " + e.getMessage()); + return error(Status.BAD_REQUEST, "Add Banner Message unexpected exception: invalid or missing JSON object."); } } @@ -2372,10 +2404,19 @@ public Response deactivateBannerMessage(@PathParam("id") Long id) throws Wrapped @Path("/bannerMessage") public Response getBannerMessages(@PathParam("id") Long id) throws WrappedResponse { - return ok(bannerMessageService.findAllBannerMessages().stream() - .map(m -> jsonObjectBuilder().add("id", m.getId()).add("displayValue", m.getDisplayValue())) - .collect(toJsonArray())); + List messagesList = bannerMessageService.findAllBannerMessages(); + for (BannerMessage message : messagesList) { + if ("".equals(message.getDisplayValue())) { + return error(Response.Status.INTERNAL_SERVER_ERROR, "No banner messages found for this locale."); + } + } + + JsonArrayBuilder messages = messagesList.stream() + .map(m -> jsonObjectBuilder().add("id", m.getId()).add("displayValue", m.getDisplayValue())) + .collect(toJsonArray()); + + return ok(messages); } @POST @@ -2418,12 +2459,60 @@ public Response getSignedUrl(@Context ContainerRequestContext crc, JsonObject ur } String baseUrl = urlInfo.getString("url"); - int timeout = urlInfo.getInt(ExternalToolHandler.TIMEOUT, 10); - String method = urlInfo.getString(ExternalToolHandler.HTTP_METHOD, "GET"); + int timeout = urlInfo.getInt(URLTokenUtil.TIMEOUT, 10); + String method = urlInfo.getString(URLTokenUtil.HTTP_METHOD, "GET"); String signedUrl = UrlSignerUtil.signUrl(baseUrl, timeout, userId, method, key); - return ok(Json.createObjectBuilder().add(ExternalToolHandler.SIGNED_URL, signedUrl)); + return ok(Json.createObjectBuilder().add(URLTokenUtil.SIGNED_URL, signedUrl)); } + @DELETE + @Path("/clearThumbnailFailureFlag") + public Response clearThumbnailFailureFlag() { + em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE").executeUpdate(); + return ok("Thumbnail Failure Flags cleared."); + } + + @DELETE + @Path("/clearThumbnailFailureFlag/{id}") + public Response clearThumbnailFailureFlagByDatafile(@PathParam("id") String fileId) { + try { + DataFile df = findDataFileOrDie(fileId); + Query deleteQuery = em.createNativeQuery("UPDATE dvobject SET previewimagefail = FALSE where id = ?"); + deleteQuery.setParameter(1, df.getId()); + deleteQuery.executeUpdate(); + return ok("Thumbnail Failure Flag cleared for file id=: " + df.getId() + "."); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + } + + /** + * For testing only. Download a file from /tmp. + */ + @GET + @AuthRequired + @Path("/downloadTmpFile") + public Response downloadTmpFile(@Context ContainerRequestContext crc, @QueryParam("fullyQualifiedPathToFile") String fullyQualifiedPathToFile) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + java.nio.file.Path normalizedPath = Paths.get(fullyQualifiedPathToFile).normalize(); + if (!normalizedPath.toString().startsWith("/tmp")) { + return error(Status.BAD_REQUEST, "Path must begin with '/tmp' but after normalization was '" + normalizedPath +"'."); + } + try { + return ok(new FileInputStream(fullyQualifiedPathToFile)); + } catch (IOException ex) { + return error(Status.BAD_REQUEST, ex.toString()); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java index daddc447117..fda698d6f5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java @@ -46,8 +46,8 @@ public void processFilePath(String fileDir, String parentIdtf, DataverseRequest SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss"); - validationLog = new PrintWriter(new FileWriter( "../logs/validationLog"+ formatter.format(timestamp)+".txt")); - cleanupLog = new PrintWriter(new FileWriter( "../logs/cleanupLog"+ formatter.format(timestamp)+".txt")); + validationLog = new PrintWriter(new FileWriter( System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "validationLog"+ formatter.format(timestamp)+".txt")); + cleanupLog = new PrintWriter(new FileWriter( System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "cleanupLog"+ formatter.format(timestamp)+".txt")); File dir = new File(fileDir); if (dir.isDirectory()) { for (File file : dir.listFiles()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java b/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java index 50862bc0d35..ba99cf33c5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/BuiltinUsers.java @@ -119,7 +119,7 @@ public Response create(BuiltinUser user, @PathParam("password") String password, */ @POST @Path("{password}/{key}/{sendEmailNotification}") - public Response create(BuiltinUser user, @PathParam("password") String password, @PathParam("key") String key, @PathParam("sendEmailNotification") Boolean sendEmailNotification) { + public Response createWithNotification(BuiltinUser user, @PathParam("password") String password, @PathParam("key") String key, @PathParam("sendEmailNotification") Boolean sendEmailNotification) { return internalSave(user, password, key, sendEmailNotification); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java index 00b7dfa6e36..01c51dc2b4c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DatasetFieldServiceApi.java @@ -24,7 +24,6 @@ import jakarta.ejb.EJBException; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; -import jakarta.validation.ConstraintViolation; import jakarta.validation.ConstraintViolationException; import jakarta.ws.rs.Consumes; import jakarta.ws.rs.GET; @@ -488,9 +487,7 @@ private String parseControlledVocabulary(String[] values) { @Consumes("application/zip") @Path("loadpropertyfiles") public Response loadLanguagePropertyFile(File inputFile) { - try - { - ZipFile file = new ZipFile(inputFile); + try (ZipFile file = new ZipFile(inputFile)) { //Get file entries Enumeration entries = file.entries(); @@ -502,20 +499,26 @@ public Response loadLanguagePropertyFile(File inputFile) { { ZipEntry entry = entries.nextElement(); String dataverseLangFileName = dataverseLangDirectory + "/" + entry.getName(); - FileOutputStream fileOutput = new FileOutputStream(dataverseLangFileName); + File entryFile = new File(dataverseLangFileName); + String canonicalPath = entryFile.getCanonicalPath(); + if (canonicalPath.startsWith(dataverseLangDirectory + "/")) { + try (FileOutputStream fileOutput = new FileOutputStream(dataverseLangFileName)) { - InputStream is = file.getInputStream(entry); - BufferedInputStream bis = new BufferedInputStream(is); + InputStream is = file.getInputStream(entry); + BufferedInputStream bis = new BufferedInputStream(is); - while (bis.available() > 0) { - fileOutput.write(bis.read()); + while (bis.available() > 0) { + fileOutput.write(bis.read()); + } + } + } else { + logger.log(Level.SEVERE, "Zip Slip prevented: uploaded zip file tried to write to {}", canonicalPath); + return Response.status(400).entity("The zip file includes an illegal file path").build(); } - fileOutput.close(); } } - catch(IOException e) - { - e.printStackTrace(); + catch(IOException e) { + logger.log(Level.SEVERE, "Reading the language property zip file failed", e); return Response.status(500).entity("Internal server error. More details available at the server logs.").build(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 292aba0cee3..4b919c5ed82 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -1,9 +1,12 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.services.s3.model.PartETag; + import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetLock.Reason; import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord; import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.authorization.Permission; @@ -13,6 +16,7 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.batch.jobs.importer.ImportMode; +import edu.harvard.iq.dataverse.dataaccess.*; import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; @@ -23,91 +27,56 @@ import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AbstractSubmitToArchiveCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AddLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreatePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CuratePublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeaccessionDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeletePrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetSpecificPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestAccessibleDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetLatestPublishedDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetPrivateUrlCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportFromFileSystemCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListVersionsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveLockCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RequestRsyncScriptCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ReturnDatasetToAuthorCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetDatasetCitationDateCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SetCurationStatusCommand; -import edu.harvard.iq.dataverse.engine.command.impl.SubmitDatasetForReviewCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetTargetURLCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetThumbnailCommand; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.DDIExportServiceBean; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.externaltools.ExternalTool; import edu.harvard.iq.dataverse.externaltools.ExternalToolHandler; +import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.globus.GlobusUtil; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.privateurl.PrivateUrl; -import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; -import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; -import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; -import edu.harvard.iq.dataverse.dataaccess.StorageIO; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.UnforcedCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; -import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; -import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean; +import edu.harvard.iq.dataverse.makedatacount.*; import edu.harvard.iq.dataverse.makedatacount.MakeDataCountLoggingServiceBean.MakeDataCountEntry; import edu.harvard.iq.dataverse.metrics.MetricsUtil; -import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.ArchiverUtil; -import edu.harvard.iq.dataverse.util.BundleUtil; -import edu.harvard.iq.dataverse.util.EjbUtil; -import edu.harvard.iq.dataverse.util.FileUtil; -import edu.harvard.iq.dataverse.util.MarkupChecker; -import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; +import edu.harvard.iq.dataverse.util.*; import edu.harvard.iq.dataverse.util.bagit.OREMap; -import edu.harvard.iq.dataverse.util.json.JSONLDUtil; -import edu.harvard.iq.dataverse.util.json.JsonLDTerm; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import edu.harvard.iq.dataverse.util.json.JsonUtil; -import edu.harvard.iq.dataverse.util.SignpostingResources; -import edu.harvard.iq.dataverse.search.IndexServiceBean; - -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; +import edu.harvard.iq.dataverse.util.json.*; import edu.harvard.iq.dataverse.workflow.Workflow; import edu.harvard.iq.dataverse.workflow.WorkflowContext; -import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import edu.harvard.iq.dataverse.workflow.WorkflowContext.TriggerType; - -import edu.harvard.iq.dataverse.globus.GlobusServiceBean; +import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; +import jakarta.ejb.EJB; +import jakarta.ejb.EJBException; +import jakarta.inject.Inject; +import jakarta.json.*; +import jakarta.json.stream.JsonParsingException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import jakarta.ws.rs.*; +import jakarta.ws.rs.container.ContainerRequestContext; +import jakarta.ws.rs.core.*; +import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.StringUtils; +import org.eclipse.microprofile.openapi.annotations.Operation; +import org.eclipse.microprofile.openapi.annotations.media.Content; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.eclipse.microprofile.openapi.annotations.parameters.RequestBody; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; +import org.glassfish.jersey.media.multipart.FormDataBodyPart; +import org.glassfish.jersey.media.multipart.FormDataContentDisposition; +import org.glassfish.jersey.media.multipart.FormDataParam; import java.io.IOException; import java.io.InputStream; @@ -117,48 +86,23 @@ import java.text.SimpleDateFormat; import java.time.LocalDate; import java.time.LocalDateTime; -import java.util.*; -import java.util.concurrent.*; -import java.util.function.Predicate; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.*; import java.util.Map.Entry; +import java.util.concurrent.ExecutionException; +import java.util.function.Predicate; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Pattern; import java.util.stream.Collectors; -import jakarta.ejb.EJB; -import jakarta.ejb.EJBException; -import jakarta.inject.Inject; -import jakarta.json.*; -import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; -import jakarta.servlet.http.HttpServletResponse; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.Consumes; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.DefaultValue; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.NotAcceptableException; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.PUT; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; -import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.*; -import jakarta.ws.rs.core.Response.Status; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import org.apache.commons.lang3.StringUtils; -import org.glassfish.jersey.media.multipart.FormDataBodyPart; -import org.glassfish.jersey.media.multipart.FormDataContentDisposition; -import org.glassfish.jersey.media.multipart.FormDataParam; -import com.amazonaws.services.s3.model.PartETag; -import edu.harvard.iq.dataverse.settings.JvmSettings; - @Path("datasets") public class Datasets extends AbstractApiBean { @@ -219,6 +163,9 @@ public class Datasets extends AbstractApiBean { @EJB EmbargoServiceBean embargoService; + @EJB + RetentionServiceBean retentionService; + @Inject MakeDataCountLoggingServiceBean mdcLogService; @@ -254,11 +201,11 @@ public interface DsVersionHandler { @GET @AuthRequired @Path("{id}") - public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { + public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response, @QueryParam("returnOwners") boolean returnOwners) { return response( req -> { - final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id))); + final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id, true))); final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved)); - final JsonObjectBuilder jsonbuilder = json(retrieved); + final JsonObjectBuilder jsonbuilder = json(retrieved, returnOwners); //Report MDC if this is a released version (could be draft if user has access, or user may not have access at all and is not getting metadata beyond the minimum) if((latest != null) && latest.isReleased()) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, retrieved); @@ -268,15 +215,13 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id" }, getRequestUser(crc)); } - // TODO: // This API call should, ideally, call findUserOrDie() and the GetDatasetCommand // to obtain the dataset that we are trying to export - which would handle // Auth in the process... For now, Auth isn't necessary - since export ONLY // WORKS on published datasets, which are open to the world. -- L.A. 4.5 - @GET @Path("/export") - @Produces({"application/xml", "application/json", "application/html", "application/ld+json" }) + @Produces({"application/xml", "application/json", "application/html", "application/ld+json", "*/*" }) public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) { try { @@ -470,14 +415,15 @@ public Response useDefaultCitationDate(@Context ContainerRequestContext crc, @Pa @GET @AuthRequired @Path("{id}/versions") - public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("includeFiles") Boolean includeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { + public Response listVersions(@Context ContainerRequestContext crc, @PathParam("id") String id, @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("limit") Integer limit, @QueryParam("offset") Integer offset) { return response( req -> { Dataset dataset = findDatasetOrDie(id); + Boolean deepLookup = excludeFiles == null ? true : !excludeFiles; - return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, (includeFiles == null ? true : includeFiles)) ) + return ok( execCommand( new ListVersionsCommand(req, dataset, offset, limit, deepLookup) ) .stream() - .map( d -> json(d, includeFiles == null ? true : includeFiles) ) + .map( d -> json(d, deepLookup) ) .collect(toJsonArray())); }, getRequestUser(crc)); } @@ -488,21 +434,39 @@ public Response listVersions(@Context ContainerRequestContext crc, @PathParam("i public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, - @QueryParam("includeFiles") Boolean includeFiles, + @QueryParam("excludeFiles") Boolean excludeFiles, @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnOwners") boolean returnOwners, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response( req -> { - DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); - - if (dsv == null || dsv.getId() == null) { + + //If excludeFiles is null the default is to provide the files and because of this we need to check permissions. + boolean checkPerms = excludeFiles == null ? true : !excludeFiles; + + Dataset dataset = findDatasetOrDie(datasetId); + DatasetVersion requestedDatasetVersion = getDatasetVersionOrDie(req, + versionId, + dataset, + uriInfo, + headers, + includeDeaccessioned, + checkPerms); + + if (requestedDatasetVersion == null || requestedDatasetVersion.getId() == null) { return notFound("Dataset version not found"); } - if (includeFiles == null ? true : includeFiles) { - dsv = datasetversionService.findDeep(dsv.getId()); + if (excludeFiles == null ? true : !excludeFiles) { + requestedDatasetVersion = datasetversionService.findDeep(requestedDatasetVersion.getId()); } - return ok(json(dsv, includeFiles == null ? true : includeFiles)); + + JsonObjectBuilder jsonBuilder = json(requestedDatasetVersion, + null, + excludeFiles == null ? true : !excludeFiles, + returnOwners); + return ok(jsonBuilder); + }, getRequestUser(crc)); } @@ -524,7 +488,7 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, @Context UriInfo uriInfo, @Context HttpHeaders headers) { return response(req -> { - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId, false), uriInfo, headers, includeDeaccessioned); DatasetVersionFilesServiceBean.FileOrderCriteria fileOrderCriteria; try { fileOrderCriteria = orderCriteria != null ? DatasetVersionFilesServiceBean.FileOrderCriteria.valueOf(orderCriteria) : DatasetVersionFilesServiceBean.FileOrderCriteria.NameAZ; @@ -543,7 +507,8 @@ public Response getVersionFiles(@Context ContainerRequestContext crc, } catch (IllegalArgumentException e) { return badRequest(BundleUtil.getStringFromBundle("datasets.api.version.files.invalid.access.status", List.of(accessStatus))); } - return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria))); + return ok(jsonFileMetadatas(datasetVersionFilesServiceBean.getFileMetadatas(datasetVersion, limit, offset, fileSearchCriteria, fileOrderCriteria)), + datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion, fileSearchCriteria)); }, getRequestUser(crc)); } @@ -729,8 +694,9 @@ public Response updateDatasetPIDMetadata(@Context ContainerRequestContext crc, @ } return response(req -> { - execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(id), req)); - List args = Arrays.asList(id); + Dataset dataset = findDatasetOrDie(id); + execCommand(new UpdateDvObjectPIDMetadataCommand(dataset, req)); + List args = Arrays.asList(dataset.getIdentifier()); return ok(BundleUtil.getStringFromBundle("datasets.api.updatePIDMetadata.success.for.single.dataset", args)); }, getRequestUser(crc)); } @@ -742,7 +708,14 @@ public Response updateDatasetPIDMetadataAll(@Context ContainerRequestContext crc return response( req -> { datasetService.findAll().forEach( ds -> { try { + logger.fine("ReRegistering: " + ds.getId() + " : " + ds.getIdentifier()); + if (!ds.isReleased() || (!ds.isIdentifierRegistered() || (ds.getIdentifier() == null))) { + if (ds.isReleased()) { + logger.warning("Dataset id=" + ds.getId() + " is in an inconsistent state (publicationdate but no identifier/identifier not registered"); + } + } else { execCommand(new UpdateDvObjectPIDMetadataCommand(findDatasetOrDie(ds.getId().toString()), req)); + } } catch (WrappedResponse ex) { Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex); } @@ -839,8 +812,8 @@ public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @ @AuthRequired @Path("{id}/metadata") @Produces("application/ld+json, application/json-ld") - public Response getVersionJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { - return getVersionJsonLDMetadata(crc, id, DS_VERSION_DRAFT, uriInfo, headers); + public Response getJsonLDMetadata(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + return getVersionJsonLDMetadata(crc, id, DS_VERSION_LATEST, uriInfo, headers); } @PUT @@ -1737,6 +1710,306 @@ public Response removeFileEmbargo(@Context ContainerRequestContext crc, @PathPar } } + @POST + @AuthRequired + @Path("{id}/files/actions/:set-retention") + public Response createFileRetention(@Context ContainerRequestContext crc, @PathParam("id") String id, String jsonBody){ + + // user is authenticated + AuthenticatedUser authenticatedUser = null; + try { + authenticatedUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Status.UNAUTHORIZED, "Authentication is required."); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(id); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + + boolean hasValidTerms = TermsOfUseAndAccessValidator.isTOUAValid(dataset.getLatestVersion().getTermsOfUseAndAccess(), null); + + if (!hasValidTerms){ + return error(Status.CONFLICT, BundleUtil.getStringFromBundle("dataset.message.toua.invalid")); + } + + // client is superadmin or (client has EditDataset permission on these files and files are unreleased) + // check if files are unreleased(DRAFT?) + if ((!authenticatedUser.isSuperuser() && (dataset.getLatestVersion().getVersionState() != DatasetVersion.VersionState.DRAFT) ) || !permissionService.userOn(authenticatedUser, dataset).has(Permission.EditDataset)) { + return error(Status.FORBIDDEN, "Either the files are released and user is not a superuser or user does not have EditDataset permissions"); + } + + // check if retentions are allowed(:MinRetentionDurationInMonths), gets the :MinRetentionDurationInMonths setting variable, if 0 or not set(null) return 400 + long minRetentionDurationInMonths = 0; + try { + minRetentionDurationInMonths = Long.parseLong(settingsService.get(SettingsServiceBean.Key.MinRetentionDurationInMonths.toString())); + } catch (NumberFormatException nfe){ + if (nfe.getMessage().contains("null")) { + return error(Status.BAD_REQUEST, "No Retention periods allowed"); + } + } + if (minRetentionDurationInMonths == 0){ + return error(Status.BAD_REQUEST, "No Retention periods allowed"); + } + + JsonObject json; + try { + json = JsonUtil.getJsonObject(jsonBody); + } catch (JsonException ex) { + return error(Status.BAD_REQUEST, "Invalid JSON; error message: " + ex.getMessage()); + } + + Retention retention = new Retention(); + + + LocalDate currentDateTime = LocalDate.now(); + + // Extract the dateUnavailable - check if specified and valid + String dateUnavailableStr = ""; + LocalDate dateUnavailable; + try { + dateUnavailableStr = json.getString("dateUnavailable"); + dateUnavailable = LocalDate.parse(dateUnavailableStr); + } catch (NullPointerException npex) { + return error(Status.BAD_REQUEST, "Invalid retention period; no dateUnavailable specified"); + } catch (ClassCastException ccex) { + return error(Status.BAD_REQUEST, "Invalid retention period; dateUnavailable must be a string"); + } catch (DateTimeParseException dtpex) { + return error(Status.BAD_REQUEST, "Invalid date format for dateUnavailable: " + dateUnavailableStr); + } + + // check :MinRetentionDurationInMonths if -1 + LocalDate minRetentionDateTime = minRetentionDurationInMonths != -1 ? LocalDate.now().plusMonths(minRetentionDurationInMonths) : null; + // dateUnavailable is not in the past + if (dateUnavailable.isAfter(currentDateTime)){ + retention.setDateUnavailable(dateUnavailable); + } else { + return error(Status.BAD_REQUEST, "Date unavailable can not be in the past"); + } + + // dateAvailable is within limits + if (minRetentionDateTime != null){ + if (dateUnavailable.isBefore(minRetentionDateTime)){ + return error(Status.BAD_REQUEST, "Date unavailable can not be earlier than MinRetentionDurationInMonths: "+minRetentionDurationInMonths + " from now"); + } + } + + try { + String reason = json.getString("reason"); + retention.setReason(reason); + } catch (NullPointerException npex) { + // ignoring; no reason specified is OK, it is optional + } catch (ClassCastException ccex) { + return error(Status.BAD_REQUEST, "Invalid retention period; reason must be a string"); + } + + + List datasetFiles = dataset.getFiles(); + List filesToRetention = new LinkedList<>(); + + // extract fileIds from json, find datafiles and add to list + if (json.containsKey("fileIds")){ + try { + JsonArray fileIds = json.getJsonArray("fileIds"); + for (JsonValue jsv : fileIds) { + try { + DataFile dataFile = findDataFileOrDie(jsv.toString()); + filesToRetention.add(dataFile); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + } catch (ClassCastException ccex) { + return error(Status.BAD_REQUEST, "Invalid retention period; fileIds must be an array of id strings"); + } catch (NullPointerException npex) { + return error(Status.BAD_REQUEST, "Invalid retention period; no fileIds specified"); + } + } else { + return error(Status.BAD_REQUEST, "No fileIds specified"); + } + + List orphanedRetentions = new ArrayList(); + // check if files belong to dataset + if (datasetFiles.containsAll(filesToRetention)) { + JsonArrayBuilder restrictedFiles = Json.createArrayBuilder(); + boolean badFiles = false; + for (DataFile datafile : filesToRetention) { + // superuser can overrule an existing retention, even on released files + if (datafile.isReleased() && !authenticatedUser.isSuperuser()) { + restrictedFiles.add(datafile.getId()); + badFiles = true; + } + } + if (badFiles) { + return Response.status(Status.FORBIDDEN) + .entity(NullSafeJsonBuilder.jsonObjectBuilder().add("status", ApiConstants.STATUS_ERROR) + .add("message", "You do not have permission to set a retention period for the following files") + .add("files", restrictedFiles).build()) + .type(MediaType.APPLICATION_JSON_TYPE).build(); + } + retention=retentionService.merge(retention); + // Good request, so add the retention. Track any existing retentions so we can + // delete them if there are no files left that reference them. + for (DataFile datafile : filesToRetention) { + Retention ret = datafile.getRetention(); + if (ret != null) { + ret.getDataFiles().remove(datafile); + if (ret.getDataFiles().isEmpty()) { + orphanedRetentions.add(ret); + } + } + // Save merges the datafile with an retention into the context + datafile.setRetention(retention); + fileService.save(datafile); + } + //Call service to get action logged + long retentionId = retentionService.save(retention, authenticatedUser.getIdentifier()); + if (orphanedRetentions.size() > 0) { + for (Retention ret : orphanedRetentions) { + retentionService.delete(ret, authenticatedUser.getIdentifier()); + } + } + //If superuser, report changes to any released files + if (authenticatedUser.isSuperuser()) { + String releasedFiles = filesToRetention.stream().filter(d -> d.isReleased()) + .map(d -> d.getId().toString()).collect(Collectors.joining(",")); + if (!releasedFiles.isBlank()) { + actionLogSvc + .log(new ActionLogRecord(ActionLogRecord.ActionType.Admin, "retentionAddedTo") + .setInfo("Retention id: " + retention.getId() + " added for released file(s), id(s) " + + releasedFiles + ".") + .setUserIdentifier(authenticatedUser.getIdentifier())); + } + } + return ok(Json.createObjectBuilder().add("message", "File(s) retention period has been set or updated")); + } else { + return error(BAD_REQUEST, "Not all files belong to dataset"); + } + } + + @POST + @AuthRequired + @Path("{id}/files/actions/:unset-retention") + public Response removeFileRetention(@Context ContainerRequestContext crc, @PathParam("id") String id, String jsonBody){ + + // user is authenticated + AuthenticatedUser authenticatedUser = null; + try { + authenticatedUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Status.UNAUTHORIZED, "Authentication is required."); + } + + Dataset dataset; + try { + dataset = findDatasetOrDie(id); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + + // client is superadmin or (client has EditDataset permission on these files and files are unreleased) + // check if files are unreleased(DRAFT?) + //ToDo - here and below - check the release status of files and not the dataset state (draft dataset version still can have released files) + if ((!authenticatedUser.isSuperuser() && (dataset.getLatestVersion().getVersionState() != DatasetVersion.VersionState.DRAFT) ) || !permissionService.userOn(authenticatedUser, dataset).has(Permission.EditDataset)) { + return error(Status.FORBIDDEN, "Either the files are released and user is not a superuser or user does not have EditDataset permissions"); + } + + // check if retentions are allowed(:MinRetentionDurationInMonths), gets the :MinRetentionDurationInMonths setting variable, if 0 or not set(null) return 400 + int minRetentionDurationInMonths = 0; + try { + minRetentionDurationInMonths = Integer.parseInt(settingsService.get(SettingsServiceBean.Key.MinRetentionDurationInMonths.toString())); + } catch (NumberFormatException nfe){ + if (nfe.getMessage().contains("null")) { + return error(Status.BAD_REQUEST, "No Retention periods allowed"); + } + } + if (minRetentionDurationInMonths == 0){ + return error(Status.BAD_REQUEST, "No Retention periods allowed"); + } + + JsonObject json; + try { + json = JsonUtil.getJsonObject(jsonBody); + } catch (JsonException ex) { + return error(Status.BAD_REQUEST, "Invalid JSON; error message: " + ex.getMessage()); + } + + List datasetFiles = dataset.getFiles(); + List retentionFilesToUnset = new LinkedList<>(); + + // extract fileIds from json, find datafiles and add to list + if (json.containsKey("fileIds")){ + try { + JsonArray fileIds = json.getJsonArray("fileIds"); + for (JsonValue jsv : fileIds) { + try { + DataFile dataFile = findDataFileOrDie(jsv.toString()); + retentionFilesToUnset.add(dataFile); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + } catch (ClassCastException ccex) { + return error(Status.BAD_REQUEST, "fileIds must be an array of id strings"); + } catch (NullPointerException npex) { + return error(Status.BAD_REQUEST, "No fileIds specified"); + } + } else { + return error(Status.BAD_REQUEST, "No fileIds specified"); + } + + List orphanedRetentions = new ArrayList(); + // check if files belong to dataset + if (datasetFiles.containsAll(retentionFilesToUnset)) { + JsonArrayBuilder restrictedFiles = Json.createArrayBuilder(); + boolean badFiles = false; + for (DataFile datafile : retentionFilesToUnset) { + // superuser can overrule an existing retention, even on released files + if (datafile.getRetention()==null || ((datafile.isReleased() && datafile.getRetention() != null) && !authenticatedUser.isSuperuser())) { + restrictedFiles.add(datafile.getId()); + badFiles = true; + } + } + if (badFiles) { + return Response.status(Status.FORBIDDEN) + .entity(NullSafeJsonBuilder.jsonObjectBuilder().add("status", ApiConstants.STATUS_ERROR) + .add("message", "The following files do not have retention periods or you do not have permission to remove their retention periods") + .add("files", restrictedFiles).build()) + .type(MediaType.APPLICATION_JSON_TYPE).build(); + } + // Good request, so remove the retention from the files. Track any existing retentions so we can + // delete them if there are no files left that reference them. + for (DataFile datafile : retentionFilesToUnset) { + Retention ret = datafile.getRetention(); + if (ret != null) { + ret.getDataFiles().remove(datafile); + if (ret.getDataFiles().isEmpty()) { + orphanedRetentions.add(ret); + } + } + // Save merges the datafile with an retention into the context + datafile.setRetention(null); + fileService.save(datafile); + } + if (orphanedRetentions.size() > 0) { + for (Retention ret : orphanedRetentions) { + retentionService.delete(ret, authenticatedUser.getIdentifier()); + } + } + String releasedFiles = retentionFilesToUnset.stream().filter(d -> d.isReleased()).map(d->d.getId().toString()).collect(Collectors.joining(",")); + if(!releasedFiles.isBlank()) { + ActionLogRecord removeRecord = new ActionLogRecord(ActionLogRecord.ActionType.Admin, "retentionRemovedFrom").setInfo("Retention removed from released file(s), id(s) " + releasedFiles + "."); + removeRecord.setUserIdentifier(authenticatedUser.getIdentifier()); + actionLogSvc.log(removeRecord); + } + return ok(Json.createObjectBuilder().add("message", "Retention periods were removed from file(s)")); + } else { + return error(BAD_REQUEST, "Not all files belong to dataset"); + } + } @PUT @AuthRequired @@ -1971,10 +2244,26 @@ public Response getDatasetThumbnail(@PathParam("id") String idSupplied) { } } - // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. - @POST - @AuthRequired - @Path("{id}/thumbnail/{dataFileId}") + @GET + @Produces({ "image/png" }) + @Path("{id}/logo") + public Response getDatasetLogo(@PathParam("id") String idSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + InputStream is = DatasetUtil.getLogoAsInputStream(dataset); + if (is == null) { + return notFound("Logo not available"); + } + return Response.ok(is).build(); + } catch (WrappedResponse wr) { + return notFound("Logo not available"); + } + } + + // TODO: Rather than only supporting looking up files by their database IDs (dataFileIdSupplied), consider supporting persistent identifiers. + @POST + @AuthRequired + @Path("{id}/thumbnail/{dataFileId}") public Response setDataFileAsThumbnail(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @PathParam("dataFileId") long dataFileIdSupplied) { try { DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setDatasetFileAsThumbnail, dataFileIdSupplied, null)); @@ -1988,6 +2277,14 @@ public Response setDataFileAsThumbnail(@Context ContainerRequestContext crc, @Pa @AuthRequired @Path("{id}/thumbnail") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Uploads a logo for a dataset", + description = "Uploads a logo for a dataset") + @APIResponse(responseCode = "200", + description = "Dataset logo uploaded successfully") + @Tag(name = "uploadDatasetLogo", + description = "Uploads a logo for a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response uploadDatasetLogo(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @FormDataParam("file") InputStream inputStream) { try { DatasetThumbnail datasetThumbnail = execCommand(new UpdateDatasetThumbnailCommand(createDataverseRequest(getRequestUser(crc)), findDatasetOrDie(idSupplied), UpdateDatasetThumbnailCommand.UserIntent.setNonDatasetFileAsThumbnail, null, inputStream)); @@ -2009,6 +2306,7 @@ public Response removeDatasetLogo(@Context ContainerRequestContext crc, @PathPar } } + @Deprecated(forRemoval = true, since = "2024-07-07") @GET @AuthRequired @Path("{identifier}/dataCaptureModule/rsync") @@ -2189,9 +2487,9 @@ public Response returnToAuthor(@Context ContainerRequestContext crc, @PathParam( Dataset dataset = findDatasetOrDie(idSupplied); String reasonForReturn = null; reasonForReturn = json.getString("reasonForReturn"); - // TODO: Once we add a box for the curator to type into, pass the reason for return to the ReturnDatasetToAuthorCommand and delete this check and call to setReturnReason on the API side. - if (reasonForReturn == null || reasonForReturn.isEmpty()) { - return error(Response.Status.BAD_REQUEST, "You must enter a reason for returning a dataset to the author(s)."); + if ((reasonForReturn == null || reasonForReturn.isEmpty()) + && !FeatureFlags.DISABLE_RETURN_TO_AUTHOR_REASON.enabled()) { + return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("dataset.reject.datasetNotInReview")); } AuthenticatedUser authenticatedUser = getRequestAuthenticatedUserOrDie(crc); Dataset updatedDataset = execCommand(new ReturnDatasetToAuthorCommand(createDataverseRequest(authenticatedUser), dataset, reasonForReturn )); @@ -2265,42 +2563,6 @@ public Response deleteCurationStatus(@Context ContainerRequestContext crc, @Path } } - @GET - @AuthRequired - @Path("{id}/uploadsid") - @Deprecated - public Response getUploadUrl(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied) { - try { - Dataset dataset = findDatasetOrDie(idSupplied); - - boolean canUpdateDataset = false; - canUpdateDataset = permissionSvc.requestOn(createDataverseRequest(getRequestUser(crc)), dataset).canIssue(UpdateDatasetVersionCommand.class); - if (!canUpdateDataset) { - return error(Response.Status.FORBIDDEN, "You are not permitted to upload files to this dataset."); - } - S3AccessIO s3io = FileUtil.getS3AccessForDirectUpload(dataset); - if (s3io == null) { - return error(Response.Status.NOT_FOUND, "Direct upload not supported for files in this dataset: " + dataset.getId()); - } - String url = null; - String storageIdentifier = null; - try { - url = s3io.generateTemporaryS3UploadUrl(); - storageIdentifier = FileUtil.getStorageIdentifierFromLocation(s3io.getStorageLocation()); - } catch (IOException io) { - logger.warning(io.getMessage()); - throw new WrappedResponse(io, error(Response.Status.INTERNAL_SERVER_ERROR, "Could not create process direct upload request")); - } - - JsonObjectBuilder response = Json.createObjectBuilder() - .add("url", url) - .add("storageIdentifier", storageIdentifier); - return ok(response); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } - } - @GET @AuthRequired @Path("{id}/uploadurls") @@ -2319,6 +2581,22 @@ public Response getMPUploadUrls(@Context ContainerRequestContext crc, @PathParam return error(Response.Status.NOT_FOUND, "Direct upload not supported for files in this dataset: " + dataset.getId()); } + Long maxSize = systemConfig.getMaxFileUploadSizeForStore(dataset.getEffectiveStorageDriverId()); + if (maxSize != null) { + if(fileSize > maxSize) { + return error(Response.Status.BAD_REQUEST, + "The file you are trying to upload is too large to be uploaded to this dataset. " + + "The maximum allowed file size is " + maxSize + " bytes."); + } + } + UploadSessionQuotaLimit limit = fileService.getUploadSessionQuotaLimit(dataset); + if (limit != null) { + if(fileSize > limit.getRemainingQuotaInBytes()) { + return error(Response.Status.BAD_REQUEST, + "The file you are trying to upload is too large to be uploaded to this dataset. " + + "The remaing file size quota is " + limit.getRemainingQuotaInBytes() + " bytes."); + } + } JsonObjectBuilder response = null; String storageIdentifier = null; try { @@ -2481,6 +2759,14 @@ public Response completeMPUpload(@Context ContainerRequestContext crc, String pa @AuthRequired @Path("{id}/add") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Uploads a file for a dataset", + description = "Uploads a file for a dataset") + @APIResponse(responseCode = "200", + description = "File uploaded successfully to dataset") + @Tag(name = "addFileToDataset", + description = "Uploads a file for a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response addFileToDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @FormDataParam("jsonData") String jsonData, @@ -2753,35 +3039,53 @@ public static T handleVersion(String versionId, DsVersionHandler hdl) } } - private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers) throws WrappedResponse { - return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, false); + /* + * includeDeaccessioned default to false and checkPermsWhenDeaccessioned to false. Use it only when you are sure that the you don't need to work with + * a deaccessioned dataset. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, + String versionNumber, + final Dataset ds, + UriInfo uriInfo, + HttpHeaders headers) throws WrappedResponse { + //The checkPerms was added to check the permissions ONLY when the dataset is deaccessioned. + boolean checkFilePerms = false; + boolean includeDeaccessioned = false; + return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, checkFilePerms); + } + + /* + * checkPermsWhenDeaccessioned default to true. Be aware that the version will be only be obtainable if the user has edit permissions. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, + UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse { + boolean checkPermsWhenDeaccessioned = true; + boolean bypassAccessCheck = false; + return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, checkPermsWhenDeaccessioned, bypassAccessCheck); } - private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned) throws WrappedResponse { - DatasetVersion dsv = execCommand(handleVersion(versionNumber, new DsVersionHandler>() { + /* + * checkPermsWhenDeaccessioned default to true. Be aware that the version will be only be obtainable if the user has edit permissions. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, + UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) throws WrappedResponse { + boolean bypassAccessCheck = false; + return getDatasetVersionOrDie(req, versionNumber, ds, uriInfo, headers, includeDeaccessioned, checkPermsWhenDeaccessioned, bypassAccessCheck); + } - @Override - public Command handleLatest() { - return new GetLatestAccessibleDatasetVersionCommand(req, ds, includeDeaccessioned); - } + /* + * Will allow to define when the permissions should be checked when a deaccesioned dataset is requested. If the user doesn't have edit permissions will result in an error. + */ + private DatasetVersion getDatasetVersionOrDie(final DataverseRequest req, String versionNumber, final Dataset ds, + UriInfo uriInfo, HttpHeaders headers, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned, + boolean bypassAccessCheck) + throws WrappedResponse { - @Override - public Command handleDraft() { - return new GetDraftDatasetVersionCommand(req, ds); - } + DatasetVersion dsv = findDatasetVersionOrDie(req, versionNumber, ds, includeDeaccessioned, checkPermsWhenDeaccessioned); - @Override - public Command handleSpecific(long major, long minor) { - return new GetSpecificPublishedDatasetVersionCommand(req, ds, major, minor, includeDeaccessioned); - } - - @Override - public Command handleLatestPublished() { - return new GetLatestPublishedDatasetVersionCommand(req, ds, includeDeaccessioned); - } - })); if (dsv == null || dsv.getId() == null) { - throw new WrappedResponse(notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); + throw new WrappedResponse( + notFound("Dataset version " + versionNumber + " of dataset " + ds.getId() + " not found")); } if (dsv.isReleased()&& uriInfo!=null) { MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, ds); @@ -2789,7 +3093,7 @@ public Command handleLatestPublished() { } return dsv; } - + @GET @Path("{identifier}/locks") public Response getLocksForDataset(@PathParam("identifier") String id, @QueryParam("type") DatasetLock.Reason lockType) { @@ -3442,15 +3746,264 @@ public Response getTimestamps(@Context ContainerRequestContext crc, @PathParam(" } +/**************************** + * Globus Support Section: + * + * Globus transfer in (upload) and out (download) involve three basic steps: The + * app is launched and makes a callback to the + * globusUploadParameters/globusDownloadParameters method to get all of the info + * needed to set up it's display. + * + * At some point after that, the user will make a selection as to which files to + * transfer and the app will call requestGlobusUploadPaths/requestGlobusDownload + * to indicate a transfer is about to start. In addition to providing the + * details of where to transfer the files to/from, Dataverse also grants the + * Globus principal involved the relevant rw or r permission for the dataset. + * + * Once the transfer is started, the app records the task id and sends it to + * Dataverse in the addGlobusFiles/monitorGlobusDownload call. Dataverse then + * monitors the transfer task and when it ultimately succeeds for fails it + * revokes the principal's permission and, for the transfer in case, adds the + * files to the dataset. (The dataset is locked until the transfer completes.) + * + * (If no transfer is started within a specified timeout, permissions will + * automatically be revoked - see the GlobusServiceBean for details.) + * + * The option to reference a file at a remote endpoint (rather than transfer it) + * follows the first two steps of the process above but completes with a call to + * the normal /addFiles endpoint (as there is no transfer to monitor and the + * files can be added to the dataset immediately.) + */ + + /** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer. This api endpoint is expected to be called as a signed callback + * after the globus-dataverse app/other app is launched, but it will accept + * other forms of authentication. + * + * @param crc + * @param datasetId + */ + @GET + @AuthRequired + @Path("{id}/globusUploadParameters") + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusUploadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "locale") String locale) { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); + + boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId); + String transferEndpoint = null; + JsonArray referenceEndpointsWithPaths = null; + if (managed) { + transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId); + } else { + referenceEndpointsWithPaths = GlobusAccessibleStore.getReferenceEndpointsWithPaths(storeId); + } + + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}"))); + JsonObject substitutedParams = tokenUtil.getParams(queryParams.build()); + JsonObjectBuilder params = Json.createObjectBuilder(); + substitutedParams.keySet().forEach((key) -> { + params.add(key, substitutedParams.get(key)); + }); + params.add("managed", Boolean.toString(managed)); + if (managed) { + Long maxSize = systemConfig.getMaxFileUploadSizeForStore(storeId); + if (maxSize != null) { + params.add("fileSizeLimit", maxSize); + } + UploadSessionQuotaLimit limit = fileService.getUploadSessionQuotaLimit(dataset); + if (limit != null) { + params.add("remainingQuota", limit.getRemainingQuotaInBytes()); + } + } + if (transferEndpoint != null) { + params.add("endpoint", transferEndpoint); + } else { + params.add("referenceEndpointsWithPaths", referenceEndpointsWithPaths); + } + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + String requestCallName = managed ? "requestGlobusTransferPaths" : "requestGlobusReferencePaths"; + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, requestCallName).add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/requestGlobusUploadPaths") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + if(managed) { + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addGlobusFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addGlobusFiles") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + } else { + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "addFiles") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/addFiles") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + } + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}") + .add(URLTokenUtil.TIMEOUT, 5)); + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 5)); + + return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build())); + } + + /** + * Provides specific storageIdentifiers to use for each file amd requests permissions for a given globus user to upload to the dataset + * + * @param crc + * @param datasetId + * @param jsonData - an object that must include the id of the globus "principal" involved and the "numberOfFiles" that will be transferred. + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/requestGlobusUploadPaths") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + String jsonBody) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api allowGlobusUpload) jsonBody ====== " + jsonBody); + + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + return e.getResponse(); + } + + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + if (permissionSvc.requestOn(createDataverseRequest(authUser), dataset) + .canIssue(UpdateDatasetVersionCommand.class)) { + + JsonObject params = JsonUtil.getJsonObject(jsonBody); + if (!GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + try { + JsonArray referencedFiles = params.getJsonArray("referencedFiles"); + if (referencedFiles == null || referencedFiles.size() == 0) { + return badRequest("No referencedFiles specified"); + } + JsonObject fileMap = globusService.requestReferenceFileIdentifiers(dataset, referencedFiles); + return (ok(fileMap)); + } catch (Exception e) { + return badRequest(e.getLocalizedMessage()); + } + } else { + try { + String principal = params.getString("principal"); + int numberOfPaths = params.getInt("numberOfFiles"); + if (numberOfPaths <= 0) { + return badRequest("numberOfFiles must be positive"); + } + + JsonObject response = globusService.requestAccessiblePaths(principal, dataset, numberOfPaths); + switch (response.getInt("status")) { + case 201: + return ok(response.getJsonObject("paths")); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } catch (NullPointerException | ClassCastException e) { + return badRequest("Error retrieving principal and numberOfFiles from JSON request body"); + + } + } + } else { + return forbidden("User doesn't have permission to upload to this dataset"); + } + + } + + /** A method analogous to /addFiles that must also include the taskIdentifier of the transfer-in-progress to monitor + * + * @param crc + * @param datasetId + * @param jsonData - see /addFiles documentation, aditional "taskIdentifier" key in the main object is required. + * @param uriInfo + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired - @Path("{id}/addglobusFiles") + @Path("{id}/addGlobusFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Uploads a Globus file for a dataset", + description = "Uploads a Globus file for a dataset") + @APIResponse(responseCode = "200", + description = "Globus file uploaded successfully to dataset") + @Tag(name = "addGlobusFilesToDataset", + description = "Uploads a Globus file for a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @FormDataParam("jsonData") String jsonData, - @Context UriInfo uriInfo, - @Context HttpHeaders headers + @Context UriInfo uriInfo ) throws IOException, ExecutionException, InterruptedException { logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); @@ -3480,6 +4033,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } catch (WrappedResponse wr) { return wr.getResponse(); } + + JsonObject jsonObject = null; + try { + jsonObject = JsonUtil.getJsonObject(jsonData); + } catch (Exception ex) { + logger.fine("Error parsing json: " + jsonData + " " + ex.getMessage()); + return badRequest("Error parsing json body"); + + } //------------------------------------ // (2b) Make sure dataset does not have package file @@ -3510,32 +4072,279 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, } - String requestUrl = headers.getRequestHeader("origin").get(0); + String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); + + // Async Call + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + + return ok("Async call to Globus Upload started "); + + } + +/** + * Retrieve the parameters and signed URLs required to perform a globus + * transfer/download. This api endpoint is expected to be called as a signed + * callback after the globus-dataverse app/other app is launched, but it will + * accept other forms of authentication. + * + * @param crc + * @param datasetId + * @param locale + * @param downloadId - an id to a cached object listing the files involved. This is generated via Dataverse and provided to the dataverse-globus app in a signedURL. + * @return - JSON containing the parameters and URLs needed by the dataverse-globus app. The format is analogous to that for external tools. + */ + @GET + @AuthRequired + @Path("{id}/globusDownloadParameters") + @Produces(MediaType.APPLICATION_JSON) + public Response getGlobusDownloadParams(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "locale") String locale, @QueryParam(value = "downloadId") String downloadId) { + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + AuthenticatedUser authUser = null; + try { + authUser = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse e) { + logger.fine("guest user globus download"); + } + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!(GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + || GlobusAccessibleStore.allowsGlobusReferences(storeId))) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } - if(requestUrl.contains("localhost")){ - requestUrl = "http://localhost:8080"; + JsonObject files = globusService.getFilesForDownload(downloadId); + if (files == null) { + return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound")); } - // Async Call - globusService.globusUpload(jsonData, token, dataset, requestUrl, authUser); + URLTokenUtil tokenUtil = new URLTokenUtil(dataset, authSvc.findApiTokenByUser(authUser), locale); - return ok("Async call to Globus Upload started "); + boolean managed = GlobusAccessibleStore.isDataverseManaged(storeId); + String transferEndpoint = null; + JsonObjectBuilder queryParams = Json.createObjectBuilder(); + queryParams.add("queryParameters", + Json.createArrayBuilder().add(Json.createObjectBuilder().add("datasetId", "{datasetId}")) + .add(Json.createObjectBuilder().add("siteUrl", "{siteUrl}")) + .add(Json.createObjectBuilder().add("datasetVersion", "{datasetVersion}")) + .add(Json.createObjectBuilder().add("dvLocale", "{localeCode}")) + .add(Json.createObjectBuilder().add("datasetPid", "{datasetPid}"))); + JsonObject substitutedParams = tokenUtil.getParams(queryParams.build()); + JsonObjectBuilder params = Json.createObjectBuilder(); + substitutedParams.keySet().forEach((key) -> { + params.add(key, substitutedParams.get(key)); + }); + params.add("managed", Boolean.toString(managed)); + if (managed) { + transferEndpoint = GlobusAccessibleStore.getTransferEndpointId(storeId); + params.add("endpoint", transferEndpoint); + } + params.add("files", files); + int timeoutSeconds = JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class); + JsonArrayBuilder allowedApiCalls = Json.createArrayBuilder(); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "monitorGlobusDownload") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/monitorGlobusDownload") + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "requestGlobusDownload") + .add(URLTokenUtil.HTTP_METHOD, "POST") + .add(URLTokenUtil.URL_TEMPLATE, + "/api/v1/datasets/{datasetId}/requestGlobusDownload?downloadId=" + downloadId) + .add(URLTokenUtil.TIMEOUT, timeoutSeconds)); + allowedApiCalls.add(Json.createObjectBuilder().add(URLTokenUtil.NAME, "getDatasetMetadata") + .add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}") + .add(URLTokenUtil.TIMEOUT, 5)); + allowedApiCalls.add( + Json.createObjectBuilder().add(URLTokenUtil.NAME, "getFileListing").add(URLTokenUtil.HTTP_METHOD, "GET") + .add(URLTokenUtil.URL_TEMPLATE, "/api/v1/datasets/{datasetId}/versions/{datasetVersion}/files") + .add(URLTokenUtil.TIMEOUT, 5)); + + return ok(tokenUtil.createPostBody(params.build(), allowedApiCalls.build())); } + /** + * Requests permissions for a given globus user to download the specified files + * the dataset and returns information about the paths to transfer from. + * + * When called directly rather than in response to being given a downloadId, the jsonData can include a "fileIds" key with an array of file ids to transfer. + * + * @param crc + * @param datasetId + * @param jsonData - a JSON object that must include the id of the Globus "principal" that will be transferring the files in the case where Dataverse manages the Globus endpoint. For remote endpoints, the principal is not required. + * @return - a JSON object containing a map of file ids to Globus endpoint/path + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ @POST @AuthRequired - @Path("{id}/deleteglobusRule") - @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathParam("id") String datasetId,@FormDataParam("jsonData") String jsonData - ) throws IOException, ExecutionException, InterruptedException { + @Path("{id}/requestGlobusDownload") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + public Response requestGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + @QueryParam(value = "downloadId") String downloadId, String jsonBody) + throws IOException, ExecutionException, InterruptedException { + logger.info(" ==== (api allowGlobusDownload) jsonBody ====== " + jsonBody); - logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + } + // ------------------------------------- + // (1) Get the user from the ContainerRequestContext + // ------------------------------------- + User user = getRequestUser(crc); - if (!systemConfig.isHTTPUpload()) { - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); + // ------------------------------------- + // (2) Get the Dataset Id + // ------------------------------------- + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + JsonObject body = null; + if (jsonBody != null) { + body = JsonUtil.getJsonObject(jsonBody); + } + Set fileIds = null; + if (downloadId != null) { + JsonObject files = globusService.getFilesForDownload(downloadId); + if (files != null) { + fileIds = files.keySet(); + } + } else { + if ((body!=null) && body.containsKey("fileIds")) { + Collection fileVals = body.getJsonArray("fileIds").getValuesAs(JsonValue.class); + fileIds = new HashSet(fileVals.size()); + for (JsonValue fileVal : fileVals) { + String id = null; + switch (fileVal.getValueType()) { + case STRING: + id = ((JsonString) fileVal).getString(); + break; + case NUMBER: + id = ((JsonNumber) fileVal).toString(); + break; + default: + return badRequest("fileIds must be numeric or string (ids/PIDs)"); + } + ; + fileIds.add(id); + } + } else { + return badRequest("fileIds JsonArray of file ids/PIDs required in POST body"); + } + } + + if (fileIds.isEmpty()) { + return notFound(BundleUtil.getStringFromBundle("datasets.api.globusdownloadnotfound")); + } + ArrayList dataFiles = new ArrayList(fileIds.size()); + for (String id : fileIds) { + boolean published = false; + logger.info("File id: " + id); + + DataFile df = null; + try { + df = findDataFileOrDie(id); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + if (!df.getOwner().equals(dataset)) { + return badRequest("All files must be in the dataset"); + } + dataFiles.add(df); + + for (FileMetadata fm : df.getFileMetadatas()) { + if (fm.getDatasetVersion().isPublished()) { + published = true; + break; + } + } + + if (!published) { + // If the file is not published, they can still download the file, if the user + // has the permission to view unpublished versions: + + if (!permissionService.hasPermissionsFor(user, df.getOwner(), + EnumSet.of(Permission.ViewUnpublishedDataset))) { + return forbidden("User doesn't have permission to download file: " + id); + } + } else { // published and restricted and/or embargoed + if (df.isRestricted() || FileUtil.isActivelyEmbargoed(df)) + // This line also handles all three authenticated session user, token user, and + // guest cases. + if (!permissionService.hasPermissionsFor(user, df, EnumSet.of(Permission.DownloadFile))) { + return forbidden("User doesn't have permission to download file: " + id); + } + + } + } + // Allowed to download all requested files + JsonObject files = GlobusUtil.getFilesMap(dataFiles, dataset); + if (GlobusAccessibleStore.isDataverseManaged(dataset.getEffectiveStorageDriverId())) { + // If managed, give the principal read permissions + int status = globusService.setPermissionForDownload(dataset, body.getString("principal")); + switch (status) { + case 201: + return ok(files); + case 400: + return badRequest("Unable to grant permission"); + case 409: + return conflict("Permission already exists"); + default: + return error(null, "Unexpected error when granting permission"); + } + + } + + return ok(files); + } + + /** + * Monitors a globus download and removes permissions on the dir/dataset when + * the specified transfer task is completed. + * + * @param crc + * @param datasetId + * @param jsonData - a JSON Object containing the key "taskIdentifier" with the + * Globus task to monitor. + * @return + * @throws IOException + * @throws ExecutionException + * @throws InterruptedException + */ + @POST + @AuthRequired + @Path("{id}/monitorGlobusDownload") + @Consumes(MediaType.APPLICATION_JSON) + public Response monitorGlobusDownload(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, + String jsonData) throws IOException, ExecutionException, InterruptedException { + + logger.info(" ==== (api deleteglobusRule) jsonData ====== " + jsonData); + + if (!systemConfig.isGlobusDownload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, + BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); } // ------------------------------------- @@ -3562,7 +4371,6 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara } - /** * Add multiple Files to an existing Dataset * @@ -3574,9 +4382,16 @@ public Response deleteglobusRule(@Context ContainerRequestContext crc, @PathPara @AuthRequired @Path("{id}/addFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) - public Response addFilesToDataset(@Context ContainerRequestContext crc, - @PathParam("id") String idSupplied, - @FormDataParam("jsonData") String jsonData) { + @Produces("application/json") + @Operation(summary = "Uploads a set of files to a dataset", + description = "Uploads a set of files to a dataset") + @APIResponse(responseCode = "200", + description = "Files uploaded successfully to dataset") + @Tag(name = "addFilesToDataset", + description = "Uploads a set of files to a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) + public Response addFilesToDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, + @FormDataParam("jsonData") String jsonData) { if (!systemConfig.isHTTPUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); @@ -3642,6 +4457,14 @@ public Response addFilesToDataset(@Context ContainerRequestContext crc, @AuthRequired @Path("{id}/replaceFiles") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Replace a set of files to a dataset", + description = "Replace a set of files to a dataset") + @APIResponse(responseCode = "200", + description = "Files replaced successfully to dataset") + @Tag(name = "replaceFilesInDataset", + description = "Replace a set of files to a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response replaceFilesInDataset(@Context ContainerRequestContext crc, @PathParam("id") String idSupplied, @FormDataParam("jsonData") String jsonData) { @@ -3784,7 +4607,7 @@ public Response getDatasetVersionArchivalStatus(@Context ContainerRequestContext headers); if (dsv.getArchivalCopyLocation() == null) { - return error(Status.NO_CONTENT, "This dataset version has not been archived"); + return error(Status.NOT_FOUND, "This dataset version has not been archived"); } else { JsonObject status = JsonUtil.getJsonObject(dsv.getArchivalCopyLocation()); return ok(status); @@ -3930,13 +4753,10 @@ public Response getExternalToolDVParams(@Context ContainerRequestContext crc, } ApiToken apiToken = null; User u = getRequestUser(crc); - if (u instanceof AuthenticatedUser) { - apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); - } - + apiToken = authSvc.getValidApiTokenForUser(u); - ExternalToolHandler eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + URLTokenUtil eth = new ExternalToolHandler(externalTool, target.getDataset(), apiToken, locale); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } catch (WrappedResponse wr) { return wr.getResponse(); } @@ -3956,7 +4776,7 @@ public Response getDatasetSummaryFieldNames() { @GET @Path("privateUrlDatasetVersion/{privateUrlToken}") - public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken) { + public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String privateUrlToken, @QueryParam("returnOwners") boolean returnOwners) { PrivateUrlUser privateUrlUser = privateUrlService.getPrivateUrlUserFromToken(privateUrlToken); if (privateUrlUser == null) { return notFound("Private URL user not found"); @@ -3973,9 +4793,9 @@ public Response getPrivateUrlDatasetVersion(@PathParam("privateUrlToken") String JsonObjectBuilder responseJson; if (isAnonymizedAccess) { List anonymizedFieldTypeNamesList = new ArrayList<>(Arrays.asList(anonymizedFieldTypeNames.split(",\\s"))); - responseJson = json(dsv, anonymizedFieldTypeNamesList, true); + responseJson = json(dsv, anonymizedFieldTypeNamesList, true, returnOwners); } else { - responseJson = json(dsv, true); + responseJson = json(dsv, null, true, returnOwners); } return ok(responseJson); } @@ -3995,9 +4815,17 @@ public Response getPrivateUrlDatasetVersionCitation(@PathParam("privateUrlToken" @GET @AuthRequired @Path("{id}/versions/{versionId}/citation") - public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + public Response getDatasetVersionCitation(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + boolean checkFilePerms = false; return response(req -> ok( - getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getCitation(true, false)), getRequestUser(crc)); + getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, + includeDeaccessioned, checkFilePerms).getCitation(true, false)), + getRequestUser(crc)); } @POST @@ -4008,7 +4836,7 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa return badRequest(BundleUtil.getStringFromBundle("datasets.api.deaccessionDataset.invalid.version.identifier.error", List.of(DS_VERSION_LATEST_PUBLISHED))); } return response(req -> { - DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, false); + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); try { JsonObject jsonObject = JsonUtil.getJsonObject(jsonBody); datasetVersion.setVersionNote(jsonObject.getString("deaccessionReason")); @@ -4021,7 +4849,10 @@ public Response deaccessionDataset(@Context ContainerRequestContext crc, @PathPa } } execCommand(new DeaccessionDatasetVersionCommand(req, datasetVersion, false)); - return ok("Dataset " + datasetId + " deaccessioned for version " + versionId); + + return ok("Dataset " + + (":persistentId".equals(datasetId) ? datasetVersion.getDataset().getGlobalId().asString() : datasetId) + + " deaccessioned for version " + versionId); } catch (JsonParsingException jpe) { return error(Response.Status.BAD_REQUEST, "Error parsing Json: " + jpe.getMessage()); } @@ -4132,4 +4963,112 @@ public Response getUserPermissionsOnDataset(@Context ContainerRequestContext crc jsonObjectBuilder.add("canDeleteDatasetDraft", permissionService.userOn(requestUser, dataset).has(Permission.DeleteDatasetDraft)); return ok(jsonObjectBuilder); } + + @GET + @AuthRequired + @Path("{id}/versions/{versionId}/canDownloadAtLeastOneFile") + public Response getCanDownloadAtLeastOneFile(@Context ContainerRequestContext crc, + @PathParam("id") String datasetId, + @PathParam("versionId") String versionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response(req -> { + DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers, includeDeaccessioned); + return ok(permissionService.canDownloadAtLeastOneFile(req, datasetVersion)); + }, getRequestUser(crc)); + } + + /** + * Get the PidProvider that will be used for generating new DOIs in this dataset + * + * @return - the id of the effective PID generator for the given dataset + * @throws WrappedResponse + */ + @GET + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response getPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context HttpHeaders headers) throws WrappedResponse { + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + PidProvider pidProvider = dataset.getEffectivePidGenerator(); + if(pidProvider == null) { + //This is basically a config error, e.g. if a valid pid provider was removed after this dataset used it + return error(Response.Status.NOT_FOUND, BundleUtil.getStringFromBundle("datasets.api.pidgenerator.notfound")); + } + String pidGeneratorId = pidProvider.getId(); + return ok(pidGeneratorId); + } + + @PUT + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response setPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String datasetId, + String generatorId, @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.UNAUTHORIZED, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(datasetId); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + if (PidUtil.getManagedProviderIds().contains(generatorId)) { + dataset.setPidGeneratorId(generatorId); + datasetService.merge(dataset); + return ok("PID Generator set to: " + generatorId); + } else { + return error(Response.Status.NOT_FOUND, "No PID Generator found for the give id"); + } + + } + + @DELETE + @AuthRequired + @Path("{identifier}/pidGenerator") + public Response resetPidGenerator(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, + @Context HttpHeaders headers) throws WrappedResponse { + + // Superuser-only: + AuthenticatedUser user; + try { + user = getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return error(Response.Status.BAD_REQUEST, "Authentication is required."); + } + if (!user.isSuperuser()) { + return error(Response.Status.FORBIDDEN, "Superusers only."); + } + + Dataset dataset; + + try { + dataset = findDatasetOrDie(dvIdtf); + } catch (WrappedResponse ex) { + return error(Response.Status.NOT_FOUND, "No such dataset"); + } + + dataset.setPidGenerator(null); + datasetService.merge(dataset); + return ok("Pid Generator reset to default: " + dataset.getEffectivePidGenerator().getId()); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index d0711aefa5f..02b60fdb32a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -1,25 +1,10 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseFacet; -import edu.harvard.iq.dataverse.DataverseContact; -import edu.harvard.iq.dataverse.DataverseMetadataBlockFacet; -import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.api.datadeposit.SwordServiceBean; import edu.harvard.iq.dataverse.api.dto.DataverseMetadataBlockFacetDTO; import edu.harvard.iq.dataverse.authorization.DataverseRole; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.GuestbookServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.api.dto.ExplicitGroupDTO; import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; @@ -35,39 +20,9 @@ import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataverse.DataverseUtil; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.AddRoleAssigneesToExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.AssignRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.CreateRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataverseStorageSizeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ImportDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListDataverseContentCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListExplicitGroupsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListFacetsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlockFacetsCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListMetadataBlocksCommand; -import edu.harvard.iq.dataverse.engine.command.impl.ListRoleAssignments; -import edu.harvard.iq.dataverse.engine.command.impl.ListRolesCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDatasetResult; -import edu.harvard.iq.dataverse.engine.command.impl.MoveDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RemoveRoleAssigneesFromExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RevokeRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseDefaultContributorRoleCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.*; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -80,23 +35,14 @@ import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.JsonUtil; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; -import java.util.TreeSet; +import java.io.StringReader; +import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.ejb.Stateless; -import jakarta.json.Json; -import jakarta.json.JsonArrayBuilder; -import jakarta.json.JsonNumber; -import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; -import jakarta.json.JsonString; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; import jakarta.validation.ConstraintViolationException; @@ -120,13 +66,8 @@ import java.io.OutputStream; import java.text.MessageFormat; import java.text.SimpleDateFormat; -import java.util.Arrays; -import java.util.Date; -import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; @@ -162,6 +103,12 @@ public class Dataverses extends AbstractApiBean { @EJB DataverseServiceBean dataverseService; + @EJB + DataverseLinkingServiceBean linkingService; + + @EJB + FeaturedDataverseServiceBean featuredDataverseService; + @EJB SwordServiceBean swordService; @@ -232,6 +179,40 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, } } + + @POST + @AuthRequired + @Path("{identifier}/validateDatasetJson") + @Consumes("application/json") + public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + try { + String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body)); + return ok(validationMessage); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @GET + @AuthRequired + @Path("{identifier}/datasetSchema") + @Produces(MediaType.APPLICATION_JSON) + public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + + try { + String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); + JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema); + return Response.ok(jsonObject).build(); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @POST @AuthRequired @@ -382,7 +363,7 @@ public Response importDataset(@Context ContainerRequestContext crc, String jsonB if (!GlobalId.verifyImportCharacters(pidParam)) { return badRequest("PID parameter contains characters that are not allowed by the Dataverse application. On import, the PID must only contain characters specified in this regex: " + BundleUtil.getStringFromBundle("pid.allowedCharacters")); } - Optional maybePid = GlobalIdServiceBean.parse(pidParam); + Optional maybePid = PidProvider.parse(pidParam); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -457,7 +438,7 @@ public Response importDatasetDdi(@Context ContainerRequestContext crc, String xm if (!GlobalId.verifyImportCharacters(pidParam)) { return badRequest("PID parameter contains characters that are not allowed by the Dataverse application. On import, the PID must only contain characters specified in this regex: " + BundleUtil.getStringFromBundle("pid.allowedCharacters")); } - Optional maybePid = GlobalIdServiceBean.parse(pidParam); + Optional maybePid = PidProvider.parse(pidParam); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -520,12 +501,10 @@ public Response recreateDataset(@Context ContainerRequestContext crc, String jso ds.setOwner(owner); ds = JSONLDUtil.updateDatasetMDFromJsonLD(ds, jsonLDBody, metadataBlockSvc, datasetFieldSvc, false, true, licenseSvc); //ToDo - verify PID is one Dataverse can manage (protocol/authority/shoulder match) - if(! - (ds.getAuthority().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Authority))&& - ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol))&& - ds.getIdentifier().startsWith(settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder)))) { - throw new BadRequestException("Cannot recreate a dataset that has a PID that doesn't match the server's settings"); - } + if (!PidUtil.getPidProvider(ds.getGlobalId().getProviderId()).canManagePID()) { + throw new BadRequestException( + "Cannot recreate a dataset that has a PID that doesn't match the server's settings"); + } if(!dvObjectSvc.isGlobalIdLocallyUnique(ds.getGlobalId())) { throw new BadRequestException("Cannot recreate a dataset whose PID is already in use"); } @@ -571,10 +550,11 @@ private Dataset parseDataset(String datasetJson) throws WrappedResponse { @GET @AuthRequired @Path("{identifier}") - public Response viewDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + public Response getDataverse(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf, @QueryParam("returnOwners") boolean returnOwners) { return response(req -> ok( json(execCommand(new GetDataverseCommand(req, findDataverseOrDie(idtf))), - settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false) + settingsService.isTrueForKey(SettingsServiceBean.Key.ExcludeEmailFromExport, false), + returnOwners )), getRequestUser(crc)); } @@ -657,6 +637,43 @@ public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam } } + @PUT + @AuthRequired + @Path("{identifier}/inputLevels") + public Response updateInputLevels(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier, String jsonBody) { + try { + Dataverse dataverse = findDataverseOrDie(identifier); + List newInputLevels = parseInputLevels(jsonBody, dataverse); + execCommand(new UpdateDataverseInputLevelsCommand(dataverse, createDataverseRequest(getRequestUser(crc)), newInputLevels)); + return ok(BundleUtil.getStringFromBundle("dataverse.update.success"), JsonPrinter.json(dataverse)); + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + + private List parseInputLevels(String jsonBody, Dataverse dataverse) throws WrappedResponse { + JsonArray inputLevelsArray = Json.createReader(new StringReader(jsonBody)).readArray(); + + List newInputLevels = new ArrayList<>(); + for (JsonValue value : inputLevelsArray) { + JsonObject inputLevel = (JsonObject) value; + String datasetFieldTypeName = inputLevel.getString("datasetFieldTypeName"); + DatasetFieldType datasetFieldType = datasetFieldSvc.findByName(datasetFieldTypeName); + + if (datasetFieldType == null) { + String errorMessage = MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.updateinputlevels.error.invalidfieldtypename"), datasetFieldTypeName); + throw new WrappedResponse(badRequest(errorMessage)); + } + + boolean required = inputLevel.getBoolean("required"); + boolean include = inputLevel.getBoolean("include"); + + newInputLevels.add(new DataverseFieldTypeInputLevel(datasetFieldType, dataverse, required, include)); + } + + return newInputLevels; + } + @DELETE @AuthRequired @Path("{linkingDataverseId}/deleteLink/{linkedDataverseId}") @@ -671,14 +688,20 @@ public Response deleteDataverseLinkingDataverse(@Context ContainerRequestContext @GET @AuthRequired @Path("{identifier}/metadatablocks") - public Response listMetadataBlocks(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) { + public Response listMetadataBlocks(@Context ContainerRequestContext crc, + @PathParam("identifier") String dvIdtf, + @QueryParam("onlyDisplayedOnCreate") boolean onlyDisplayedOnCreate, + @QueryParam("returnDatasetFieldTypes") boolean returnDatasetFieldTypes) { try { - JsonArrayBuilder arr = Json.createArrayBuilder(); - final List blocks = execCommand(new ListMetadataBlocksCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf))); - for (MetadataBlock mdb : blocks) { - arr.add(brief.json(mdb)); - } - return ok(arr); + Dataverse dataverse = findDataverseOrDie(dvIdtf); + final List metadataBlocks = execCommand( + new ListMetadataBlocksCommand( + createDataverseRequest(getRequestUser(crc)), + dataverse, + onlyDisplayedOnCreate + ) + ); + return ok(json(metadataBlocks, returnDatasetFieldTypes, onlyDisplayedOnCreate, dataverse)); } catch (WrappedResponse we) { return we.getResponse(); } @@ -782,6 +805,111 @@ public Response listFacets(@Context ContainerRequestContext crc, @PathParam("ide } } + + @GET + @AuthRequired + @Path("{identifier}/featured") + /* + Allows user to get the collections that are featured by a given collection + probably more for SPA than end user + */ + public Response getFeaturedDataverses(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, String dvAliases) { + + try { + User u = getRequestUser(crc); + DataverseRequest r = createDataverseRequest(u); + Dataverse dataverse = findDataverseOrDie(dvIdtf); + JsonArrayBuilder fs = Json.createArrayBuilder(); + for (Dataverse f : execCommand(new ListFeaturedCollectionsCommand(r, dataverse))) { + fs.add(f.getAlias()); + } + return ok(fs); + } catch (WrappedResponse e) { + return e.getResponse(); + } + } + + + @POST + @AuthRequired + @Path("{identifier}/featured") + /** + * Allows user to set featured dataverses - must have edit dataverse permission + * + */ + public Response setFeaturedDataverses(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, String dvAliases) { + List dvsFromInput = new LinkedList<>(); + + + try { + + for (JsonString dvAlias : Util.asJsonArray(dvAliases).getValuesAs(JsonString.class)) { + Dataverse dvToBeFeatured = dataverseService.findByAlias(dvAlias.getString()); + if (dvToBeFeatured == null) { + return error(Response.Status.BAD_REQUEST, "Can't find dataverse collection with alias '" + dvAlias + "'"); + } + dvsFromInput.add(dvToBeFeatured); + } + + if (dvsFromInput.isEmpty()) { + return error(Response.Status.BAD_REQUEST, "Please provide a valid Json array of dataverse collection aliases to be featured."); + } + + Dataverse dataverse = findDataverseOrDie(dvIdtf); + List featuredSource = new ArrayList<>(); + List featuredTarget = new ArrayList<>(); + featuredSource.addAll(dataverseService.findAllPublishedByOwnerId(dataverse.getId())); + featuredSource.addAll(linkingService.findLinkedDataverses(dataverse.getId())); + List featuredList = featuredDataverseService.findByDataverseId(dataverse.getId()); + + if (featuredSource.isEmpty()) { + return error(Response.Status.BAD_REQUEST, "There are no collections avaialble to be featured in Dataverse collection '" + dataverse.getDisplayName() + "'."); + } + + for (DataverseFeaturedDataverse dfd : featuredList) { + Dataverse fd = dfd.getFeaturedDataverse(); + featuredTarget.add(fd); + featuredSource.remove(fd); + } + + for (Dataverse test : dvsFromInput) { + if (featuredTarget.contains(test)) { + return error(Response.Status.BAD_REQUEST, "Dataverse collection '" + test.getDisplayName() + "' is already featured in Dataverse collection '" + dataverse.getDisplayName() + "'."); + } + + if (featuredSource.contains(test)) { + featuredTarget.add(test); + } else { + return error(Response.Status.BAD_REQUEST, "Dataverse collection '" + test.getDisplayName() + "' may not be featured in Dataverse collection '" + dataverse.getDisplayName() + "'."); + } + + } + // by passing null for Facets and DataverseFieldTypeInputLevel, those are not changed + execCommand(new UpdateDataverseCommand(dataverse, null, featuredTarget, createDataverseRequest(getRequestUser(crc)), null)); + return ok("Featured Dataverses of dataverse " + dvIdtf + " updated."); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + } catch (JsonParsingException jpe){ + return error(Response.Status.BAD_REQUEST, "Please provide a valid Json array of dataverse collection aliases to be featured."); + } + + } + + @DELETE + @AuthRequired + @Path("{identifier}/featured") + public Response deleteFeaturedCollections(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse { + try { + Dataverse dataverse = findDataverseOrDie(dvIdtf); + List featuredTarget = new ArrayList<>(); + execCommand(new UpdateDataverseCommand(dataverse, null, featuredTarget, createDataverseRequest(getRequestUser(crc)), null)); + return ok(BundleUtil.getStringFromBundle("dataverses.api.delete.featured.collections.successful")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + @POST @AuthRequired @Path("{identifier}/facets") @@ -937,7 +1065,62 @@ public Response getStorageSize(@Context ContainerRequestContext crc, @PathParam( execCommand(new GetDataverseStorageSizeCommand(req, findDataverseOrDie(dvIdtf), includeCached)))), getRequestUser(crc)); } + @GET + @AuthRequired + @Path("{identifier}/storage/quota") + public Response getCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse { + try { + Long bytesAllocated = execCommand(new GetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf))); + if (bytesAllocated != null) { + return ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation"),bytesAllocated)); + } + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + + @POST + @AuthRequired + @Path("{identifier}/storage/quota/{bytesAllocated}") + public Response setCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf, @PathParam("bytesAllocated") Long bytesAllocated) throws WrappedResponse { + try { + execCommand(new SetCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf), bytesAllocated)); + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + + @DELETE + @AuthRequired + @Path("{identifier}/storage/quota") + public Response deleteCollectionQuota(@Context ContainerRequestContext crc, @PathParam("identifier") String dvIdtf) throws WrappedResponse { + try { + execCommand(new DeleteCollectionQuotaCommand(createDataverseRequest(getRequestUser(crc)), findDataverseOrDie(dvIdtf))); + return ok(BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted")); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + /** + * + * @param crc + * @param identifier + * @return + * @throws edu.harvard.iq.dataverse.api.AbstractApiBean.WrappedResponse + * @todo: add an optional parameter that would force the recorded storage use + * to be recalculated (or should that be a POST version of this API?) + */ + @GET + @AuthRequired + @Path("{identifier}/storage/use") + public Response getCollectionStorageUse(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier) throws WrappedResponse { + return response(req -> ok(MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.storage.use"), + execCommand(new GetCollectionStorageUseCommand(req, findDataverseOrDie(identifier))))), getRequestUser(crc)); + } + @GET @AuthRequired @Path("{identifier}/roles") diff --git a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java index af681234e82..c815caa09eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/DownloadInstanceWriter.java @@ -103,8 +103,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] String auxiliaryTag = null; String auxiliaryType = null; String auxiliaryFileName = null; + // Before we do anything else, check if this download can be handled // by a redirect to remote storage (only supported on S3, as of 5.4): + if (storageIO.downloadRedirectEnabled()) { // Even if the above is true, there are a few cases where a @@ -158,7 +160,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } } else if (dataFile.isTabularData()) { - // Many separate special cases here. + // Many separate special cases here. if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { @@ -179,12 +181,26 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirectSupported = false; } } - } else if (!di.getConversionParam().equals("noVarHeader")) { - // This is a subset request - can't do. + } else if (di.getConversionParam().equals("noVarHeader")) { + // This will work just fine, if the tab. file is + // stored without the var. header. Throw "unavailable" + // exception otherwise. + // @todo: should we actually drop support for this "noVarHeader" flag? + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + throw new ServiceUnavailableException(); + } + // ... defaults to redirectSupported = true + } else { + // This must be a subset request then - can't do. + redirectSupported = false; + } + } else { + // "straight" download of the full tab-delimited file. + // can redirect, but only if stored with the variable + // header already added: + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { redirectSupported = false; } - } else { - redirectSupported = false; } } } @@ -206,14 +222,15 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] redirect_url_str = null; } } - - if (systemConfig.isGlobusFileDownload() && systemConfig.getGlobusStoresList() - .contains(DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()))) { + String driverId = DataAccess.getStorageDriverFromIdentifier(dataFile.getStorageIdentifier()); + if (systemConfig.isGlobusFileDownload() && (GlobusAccessibleStore.acceptsGlobusTransfers(driverId) || GlobusAccessibleStore.allowsGlobusReferences(driverId))) { if (di.getConversionParam() != null) { if (di.getConversionParam().equals("format")) { if ("GlobusTransfer".equals(di.getConversionParamValue())) { - redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, dataFile); + List downloadDFList = new ArrayList(1); + downloadDFList.add(dataFile); + redirect_url_str = globusService.getGlobusAppUrlForDataset(dataFile.getOwner(), false, downloadDFList); } } } @@ -245,11 +262,16 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // finally, issue the redirect: Response response = Response.seeOther(redirect_uri).build(); logger.fine("Issuing redirect to the file location."); + // Yes, this throws an exception. It's not an exception + // as in, "bummer, something went wrong". This is how a + // redirect is produced here! throw new RedirectionException(response); } throw new ServiceUnavailableException(); } + // Past this point, this is a locally served/streamed download + if (di.getConversionParam() != null) { // Image Thumbnail and Tabular data conversion: // NOTE: only supported on local files, as of 4.0.2! @@ -261,7 +283,7 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, ImageThumbConverter.DEFAULT_THUMBNAIL_SIZE); } else { try { - int size = new Integer(di.getConversionParamValue()); + int size = Integer.parseInt(di.getConversionParamValue()); if (size > 0) { storageIO = ImageThumbConverter.getImageThumbnailAsInputStream(storageIO, size); } @@ -272,8 +294,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // and, since we now have tabular data files that can // have thumbnail previews... obviously, we don't want to // add the variable header to the image stream! - storageIO.setNoVarHeader(Boolean.TRUE); - storageIO.setVarHeader(null); + if (storageIO != null) { // ImageThumbConverter returns null if thumbnail conversion fails + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); + } } } else if (dataFile.isTabularData()) { logger.fine("request for tabular data download;"); @@ -283,9 +307,14 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] // request any tabular-specific services. if (di.getConversionParam().equals("noVarHeader")) { - logger.fine("tabular data with no var header requested"); - storageIO.setNoVarHeader(Boolean.TRUE); - storageIO.setVarHeader(null); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular data with no var header requested"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); + } else { + logger.fine("can't serve request for tabular data without varheader, since stored with it"); + throw new ServiceUnavailableException(); + } } else if (di.getConversionParam().equals("format")) { // Conversions, and downloads of "stored originals" are // now supported on all DataFiles for which StorageIO @@ -327,11 +356,10 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] if (variable.getDataTable().getDataFile().getId().equals(dataFile.getId())) { logger.fine("adding variable id " + variable.getId() + " to the list."); variablePositionIndex.add(variable.getFileOrder()); - if (subsetVariableHeader == null) { - subsetVariableHeader = variable.getName(); - } else { - subsetVariableHeader = subsetVariableHeader.concat("\t"); - subsetVariableHeader = subsetVariableHeader.concat(variable.getName()); + if (!dataFile.getDataTable().isStoredWithVariableHeader()) { + subsetVariableHeader = subsetVariableHeader == null + ? variable.getName() + : subsetVariableHeader.concat("\t" + variable.getName()); } } else { logger.warning("variable does not belong to this data file."); @@ -344,7 +372,17 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] try { File tempSubsetFile = File.createTempFile("tempSubsetFile", ".tmp"); TabularSubsetGenerator tabularSubsetGenerator = new TabularSubsetGenerator(); - tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), tempSubsetFile.getAbsolutePath(), variablePositionIndex, dataFile.getDataTable().getCaseQuantity(), "\t"); + + long numberOfLines = dataFile.getDataTable().getCaseQuantity(); + if (dataFile.getDataTable().isStoredWithVariableHeader()) { + numberOfLines++; + } + + tabularSubsetGenerator.subsetFile(storageIO.getInputStream(), + tempSubsetFile.getAbsolutePath(), + variablePositionIndex, + numberOfLines, + "\t"); if (tempSubsetFile.exists()) { FileInputStream subsetStream = new FileInputStream(tempSubsetFile); @@ -352,8 +390,11 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] InputStreamIO subsetStreamIO = new InputStreamIO(subsetStream, subsetSize); logger.fine("successfully created subset output stream."); - subsetVariableHeader = subsetVariableHeader.concat("\n"); - subsetStreamIO.setVarHeader(subsetVariableHeader); + + if (subsetVariableHeader != null) { + subsetVariableHeader = subsetVariableHeader.concat("\n"); + subsetStreamIO.setVarHeader(subsetVariableHeader); + } String tabularFileName = storageIO.getFileName(); @@ -378,8 +419,13 @@ public void writeTo(DownloadInstance di, Class clazz, Type type, Annotation[] } else { logger.fine("empty list of extra arguments."); } + // end of tab. data subset case + } else if (dataFile.getDataTable().isStoredWithVariableHeader()) { + logger.fine("tabular file stored with the var header included, no need to generate it on the fly"); + storageIO.setNoVarHeader(Boolean.TRUE); + storageIO.setVarHeader(null); } - } + } // end of tab. data file case if (storageIO == null) { //throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/FeedbackApi.java b/src/main/java/edu/harvard/iq/dataverse/api/FeedbackApi.java index 8a178f8da62..56c5ca95ce6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/FeedbackApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/FeedbackApi.java @@ -7,9 +7,6 @@ import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.feedback.Feedback; import edu.harvard.iq.dataverse.feedback.FeedbackUtil; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import edu.harvard.iq.dataverse.util.MailUtil; import jakarta.ejb.EJB; import jakarta.json.Json; @@ -40,7 +37,7 @@ public class FeedbackApi extends AbstractApiBean { * user input (e.g. to strip potentially malicious html, etc.)!!!! **/ @POST - public Response submitFeedback(JsonObject jsonObject) throws AddressException { + public Response submitFeedback(JsonObject jsonObject) { JsonNumber jsonNumber = jsonObject.getJsonNumber("targetId"); DvObject feedbackTarget = null; if (jsonNumber != null) { @@ -51,8 +48,7 @@ public Response submitFeedback(JsonObject jsonObject) throws AddressException { } DataverseSession dataverseSession = null; String userMessage = jsonObject.getString("body"); - String systemEmail = JvmSettings.SUPPORT_EMAIL.lookupOptional().orElse(settingsSvc.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - InternetAddress systemAddress = MailUtil.parseSystemAddress(systemEmail); + InternetAddress systemAddress = mailService.getSupportAddress().orElse(null); String userEmail = jsonObject.getString("fromEmail"); String messageSubject = jsonObject.getString("subject"); String baseUrl = systemConfig.getDataverseSiteUrl(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index ad24d81d996..d786aab35a8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -2,22 +2,7 @@ import com.google.gson.Gson; import com.google.gson.JsonObject; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetLock; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DatasetVersionServiceBean; -import edu.harvard.iq.dataverse.DataverseRequestServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.EjbDataverseEngine; -import edu.harvard.iq.dataverse.FileDownloadServiceBean; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; -import edu.harvard.iq.dataverse.TermsOfUseAndAccessValidator; -import edu.harvard.iq.dataverse.UserNotificationServiceBean; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.ApiToken; @@ -27,15 +12,11 @@ import edu.harvard.iq.dataverse.datasetutility.DataFileTagException; import edu.harvard.iq.dataverse.datasetutility.NoFilesException; import edu.harvard.iq.dataverse.datasetutility.OptionalFileParams; +import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.engine.command.impl.GetDataFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetDraftFileMetadataIfAvailableCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RedetectFileTypeCommand; -import edu.harvard.iq.dataverse.engine.command.impl.RestrictFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UningestFileCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.engine.command.impl.*; import edu.harvard.iq.dataverse.export.ExportService; import io.gdcc.spi.export.ExportException; import edu.harvard.iq.dataverse.externaltools.ExternalTool; @@ -49,6 +30,10 @@ import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.api.Datasets.handleVersion; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; @@ -64,10 +49,7 @@ import jakarta.ejb.EJB; import jakarta.ejb.EJBException; import jakarta.inject.Inject; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonString; -import jakarta.json.JsonValue; +import jakarta.json.*; import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletResponse; import jakarta.ws.rs.*; @@ -82,6 +64,13 @@ import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import jakarta.ws.rs.core.UriInfo; + +import org.eclipse.microprofile.openapi.annotations.Operation; +import org.eclipse.microprofile.openapi.annotations.media.Content; +import org.eclipse.microprofile.openapi.annotations.media.Schema; +import org.eclipse.microprofile.openapi.annotations.parameters.RequestBody; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; import org.glassfish.jersey.media.multipart.FormDataBodyPart; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; @@ -194,6 +183,14 @@ public Response restrictFileInDataset(@Context ContainerRequestContext crc, @Pat @AuthRequired @Path("{id}/replace") @Consumes(MediaType.MULTIPART_FORM_DATA) + @Produces("application/json") + @Operation(summary = "Replace a file on a dataset", + description = "Replace a file to a dataset") + @APIResponse(responseCode = "200", + description = "File replaced successfully on the dataset") + @Tag(name = "replaceFilesInDataset", + description = "Replace a file to a dataset") + @RequestBody(content = @Content(mediaType = MediaType.MULTIPART_FORM_DATA)) public Response replaceFileInDataset( @Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @@ -228,10 +225,10 @@ public Response replaceFileInDataset( // - Will skip extra attributes which includes fileToReplaceId and forceReplace optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); + return error(BAD_REQUEST, ex.getMessage()); } } catch (ClassCastException | com.google.gson.JsonParseException ex) { - return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); } } @@ -320,7 +317,7 @@ public Response replaceFileInDataset( //"Look at that! You added a file! (hey hey, it may have worked)"); } catch (NoFilesException ex) { Logger.getLogger(Files.class.getName()).log(Level.SEVERE, null, ex); - return error(Response.Status.BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); + return error(BAD_REQUEST, "NoFileException! Serious Error! See administrator!"); } } @@ -410,7 +407,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa //we get the data file to do a permissions check, if this fails it'll go to the WrappedResponse below for an ugly unpermitted error execCommand(new GetDataFileCommand(req, findDataFileOrDie(result.get(0).toString()))); - return error(Response.Status.BAD_REQUEST, "You cannot edit metadata on a dataFile that has been replaced. Please try again with the newest file id."); + return error(BAD_REQUEST, "You cannot edit metadata on a dataFile that has been replaced. Please try again with the newest file id."); } // (2) Check/Parse the JSON (if uploaded) @@ -432,10 +429,10 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa // - Will skip extra attributes which includes fileToReplaceId and forceReplace optionalFileParams = new OptionalFileParams(jsonData); } catch (DataFileTagException ex) { - return error(Response.Status.BAD_REQUEST, ex.getMessage()); + return error(BAD_REQUEST, ex.getMessage()); } } catch (ClassCastException | com.google.gson.JsonParseException ex) { - return error(Response.Status.BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); + return error(BAD_REQUEST, BundleUtil.getStringFromBundle("file.addreplace.error.parsing")); } } @@ -456,7 +453,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa } if (upFmd == null){ - return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); + return error(BAD_REQUEST, "An error has occurred attempting to update the requested DataFile. It is not part of the current version of the Dataset."); } jakarta.json.JsonObject jsonObject = JsonUtil.getJsonObject(jsonData); @@ -487,7 +484,7 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa } } catch (WrappedResponse wr) { - return error(Response.Status.BAD_REQUEST, "An error has occurred attempting to update the requested DataFile, likely related to permissions."); + return error(BAD_REQUEST, "An error has occurred attempting to update the requested DataFile, likely related to permissions."); } String jsonString = upFmd.asGsonObject(true).toString(); @@ -498,79 +495,82 @@ public Response updateFileMetadata(@Context ContainerRequestContext crc, @FormDa .type(MediaType.TEXT_PLAIN) //Our plain text string is already json .build(); } - + @GET @AuthRequired - @Path("{id}/draft") - public Response getFileDataDraft(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { - return getFileDataResponse(getRequestUser(crc), fileIdOrPersistentId, uriInfo, headers, response, true); + @Path("{id}") + public Response getFileData(@Context ContainerRequestContext crc, + @PathParam("id") String fileIdOrPersistentId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnDatasetVersion") boolean returnDatasetVersion, + @QueryParam("returnOwners") boolean returnOwners, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response( req -> getFileDataResponse(req, fileIdOrPersistentId, DS_VERSION_LATEST, includeDeaccessioned, returnDatasetVersion, returnOwners, uriInfo, headers), getRequestUser(crc)); } - + @GET @AuthRequired - @Path("{id}") - public Response getFileData(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WrappedResponse, Exception { - return getFileDataResponse(getRequestUser(crc), fileIdOrPersistentId, uriInfo, headers, response, false); + @Path("{id}/versions/{datasetVersionId}") + public Response getFileDataForVersion(@Context ContainerRequestContext crc, + @PathParam("id") String fileIdOrPersistentId, + @PathParam("datasetVersionId") String datasetVersionId, + @QueryParam("includeDeaccessioned") boolean includeDeaccessioned, + @QueryParam("returnDatasetVersion") boolean returnDatasetVersion, + @QueryParam("returnOwners") boolean returnOwners, + @Context UriInfo uriInfo, + @Context HttpHeaders headers) { + return response( req -> getFileDataResponse(req, fileIdOrPersistentId, datasetVersionId, includeDeaccessioned, returnDatasetVersion, returnOwners, uriInfo, headers), getRequestUser(crc)); } - - private Response getFileDataResponse(User user, String fileIdOrPersistentId, UriInfo uriInfo, HttpHeaders headers, HttpServletResponse response, boolean draft ){ - - DataverseRequest req; - try { - req = createDataverseRequest(user); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting to request information. Maybe a bad API token?"); - } - final DataFile df; - try { - df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); - } catch (Exception e) { - return error(BAD_REQUEST, "Error attempting get the requested data file."); - } - FileMetadata fm; + private Response getFileDataResponse(final DataverseRequest req, + String fileIdOrPersistentId, + String datasetVersionId, + boolean includeDeaccessioned, + boolean returnDatasetVersion, + boolean returnOwners, + UriInfo uriInfo, + HttpHeaders headers) throws WrappedResponse { + final DataFile dataFile = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + FileMetadata fileMetadata = execCommand(handleVersion(datasetVersionId, new Datasets.DsVersionHandler<>() { + @Override + public Command handleLatest() { + return new GetLatestAccessibleFileMetadataCommand(req, dataFile, includeDeaccessioned); + } - if (draft) { - try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); - } catch (WrappedResponse w) { - return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); + @Override + public Command handleDraft() { + return new GetDraftFileMetadataIfAvailableCommand(req, dataFile); } - if (null == fm) { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); + + @Override + public Command handleSpecific(long major, long minor) { + return new GetSpecificPublishedFileMetadataByDatasetVersionCommand(req, dataFile, major, minor, includeDeaccessioned); } - } else { - //first get latest published - //if not available get draft if permissible - try { - fm = df.getLatestPublishedFileMetadata(); - - } catch (UnsupportedOperationException e) { - try { - fm = execCommand(new GetDraftFileMetadataIfAvailableCommand(req, df)); - } catch (WrappedResponse w) { - return error(BAD_REQUEST, "An error occurred getting a draft version, you may not have permission to access unpublished data on this dataset."); - } - if (null == fm) { - return error(BAD_REQUEST, BundleUtil.getStringFromBundle("files.api.no.draft")); - } + @Override + public Command handleLatestPublished() { + return new GetLatestPublishedFileMetadataCommand(req, dataFile, includeDeaccessioned); } + })); + if (fileMetadata == null) { + throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("files.api.notFoundInVersion", Arrays.asList(fileIdOrPersistentId, datasetVersionId)))); } - - if (fm.getDatasetVersion().isReleased()) { - MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, df); + + if (fileMetadata.getDatasetVersion().isReleased()) { + MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountLoggingServiceBean.MakeDataCountEntry(uriInfo, headers, dvRequestService, dataFile); mdcLogService.logEntry(entry); } - + return Response.ok(Json.createObjectBuilder() .add("status", ApiConstants.STATUS_OK) - .add("data", json(fm)).build()) + .add("data", json(fileMetadata, returnOwners, returnDatasetVersion)).build()) .type(MediaType.APPLICATION_JSON) .build(); } + @GET @AuthRequired @Path("{id}/metadata") @@ -635,23 +635,41 @@ public Response uningestDatafile(@Context ContainerRequestContext crc, @PathPara if (dataFile == null) { return error(Response.Status.NOT_FOUND, "File not found for given id."); } - if (!dataFile.isTabularData()) { - return error(Response.Status.BAD_REQUEST, "Cannot uningest non-tabular file."); - } - - try { - DataverseRequest req = createDataverseRequest(getRequestUser(crc)); - execCommand(new UningestFileCommand(req, dataFile)); - Long dataFileId = dataFile.getId(); - dataFile = fileService.find(dataFileId); - Dataset theDataset = dataFile.getOwner(); - exportDatasetMetadata(settingsService, theDataset); - return ok("Datafile " + dataFileId + " uningested."); - } catch (WrappedResponse wr) { - return wr.getResponse(); + // Ingest never succeeded, either there was a failure or this is not a tabular + // data file + // We allow anyone who can publish to uningest in order to clear a problem + if (dataFile.isIngestProblem()) { + try { + AuthenticatedUser au = getRequestAuthenticatedUserOrDie(crc); + if (!(permissionSvc.permissionsFor(au, dataFile).contains(Permission.PublishDataset))) { + return forbidden( + "Uningesting to remove an ingest problem can only be done by those who can publish the dataset"); + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + dataFile.setIngestDone(); + dataFile.setIngestReport(null); + fileService.save(dataFile); + return ok("Datafile " + dataFile.getId() + " uningested."); + } else { + return error(BAD_REQUEST, + BundleUtil.getStringFromBundle("Cannot uningest non-tabular file.")); + } + } else { + try { + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); + execCommand(new UningestFileCommand(req, dataFile)); + Long dataFileId = dataFile.getId(); + dataFile = fileService.find(dataFileId); + Dataset theDataset = dataFile.getOwner(); + exportDatasetMetadata(settingsService, theDataset); + return ok("Datafile " + dataFileId + " uningested."); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } } - } // reingest attempts to queue an *existing* DataFile @@ -670,7 +688,7 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") try { u = getRequestAuthenticatedUserOrDie(crc); if (!u.isSuperuser()) { - return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + return error(FORBIDDEN, "This API call can be used by superusers only"); } } catch (WrappedResponse wr) { return wr.getResponse(); @@ -686,21 +704,21 @@ public Response reingest(@Context ContainerRequestContext crc, @PathParam("id") Dataset dataset = dataFile.getOwner(); if (dataset == null) { - return error(Response.Status.BAD_REQUEST, "Failed to locate the parent dataset for the datafile."); + return error(BAD_REQUEST, "Failed to locate the parent dataset for the datafile."); } if (dataFile.isTabularData()) { - return error(Response.Status.BAD_REQUEST, "The datafile is already ingested as Tabular."); + return error(BAD_REQUEST, "The datafile is already ingested as Tabular."); } boolean ingestLock = dataset.isLockedFor(DatasetLock.Reason.Ingest); if (ingestLock) { - return error(Response.Status.FORBIDDEN, "Dataset already locked with an Ingest lock"); + return error(FORBIDDEN, "Dataset already locked with an Ingest lock"); } if (!FileUtil.canIngestAsTabular(dataFile)) { - return error(Response.Status.BAD_REQUEST, "Tabular ingest is not supported for this file type (id: "+id+", type: "+dataFile.getContentType()+")"); + return error(BAD_REQUEST, "Tabular ingest is not supported for this file type (id: "+id+", type: "+dataFile.getContentType()+")"); } dataFile.SetIngestScheduled(); @@ -740,7 +758,7 @@ public Response redetectDatafile(@Context ContainerRequestContext crc, @PathPara // Ingested Files have mimetype = text/tab-separated-values // No need to redetect if (dataFileIn.isTabularData()) { - return error(Response.Status.BAD_REQUEST, "The file is an ingested tabular file."); + return error(BAD_REQUEST, "The file is an ingested tabular file."); } String originalContentType = dataFileIn.getContentType(); DataFile dataFileOut = execCommand(new RedetectFileTypeCommand(createDataverseRequest(getRequestUser(crc)), dataFileIn, dryRun)); @@ -763,7 +781,7 @@ public Response extractNcml(@Context ContainerRequestContext crc, @PathParam("id if (!au.isSuperuser()) { // We can always make a command in the future if there's a need // for non-superusers to call this API. - return error(Response.Status.FORBIDDEN, "This API call can be used by superusers only"); + return error(FORBIDDEN, "This API call can be used by superusers only"); } DataFile dataFileIn = findDataFileOrDie(id); java.nio.file.Path tempLocationPath = null; @@ -814,19 +832,17 @@ public Response getExternalToolFMParams(@Context ContainerRequestContext crc, @P return error(BAD_REQUEST, "External tool does not have file scope."); } ApiToken apiToken = null; - User u = getRequestUser(crc); - if (u instanceof AuthenticatedUser) { - apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) u); - } + User user = getRequestUser(crc); + apiToken = authSvc.getValidApiTokenForUser(user); FileMetadata target = fileSvc.findFileMetadata(fmid); if (target == null) { return error(BAD_REQUEST, "FileMetadata not found."); } - ExternalToolHandler eth = null; + URLTokenUtil eth = null; eth = new ExternalToolHandler(externalTool, target.getDataFile(), apiToken, target, locale); - return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())))); + return ok(eth.createPostBody(eth.getParams(JsonUtil.getJsonObject(externalTool.getToolParameters())), JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()))); } @GET @@ -931,4 +947,37 @@ public Response getHasBeenDeleted(@Context ContainerRequestContext crc, @PathPar return ok(dataFileServiceBean.hasBeenDeleted(dataFile)); }, getRequestUser(crc)); } + + /** + * @param fileIdOrPersistentId Database ID or PID of the data file. + * @param versionNumber The version of the dataset, such as 1.0, :draft, + * :latest-published, etc. + * @param includeDeaccessioned Defaults to false. + */ + @GET + @AuthRequired + @Path("{id}/versions/{dsVersionString}/citation") + public Response getFileCitationByVersion(@Context ContainerRequestContext crc, @PathParam("id") String fileIdOrPersistentId, @PathParam("dsVersionString") String versionNumber, @QueryParam("includeDeaccessioned") boolean includeDeaccessioned) { + try { + DataverseRequest req = createDataverseRequest(getRequestUser(crc)); + final DataFile df = execCommand(new GetDataFileCommand(req, findDataFileOrDie(fileIdOrPersistentId))); + Dataset ds = df.getOwner(); + DatasetVersion dsv = findDatasetVersionOrDie(req, versionNumber, ds, includeDeaccessioned, true); + if (dsv == null) { + return unauthorized(BundleUtil.getStringFromBundle("files.api.no.draftOrUnauth")); + } + + Long getDatasetVersionID = dsv.getId(); + FileMetadata fm = dataFileServiceBean.findFileMetadataByDatasetVersionIdAndDataFileId(getDatasetVersionID, df.getId()); + if (fm == null) { + return notFound(BundleUtil.getStringFromBundle("files.api.fileNotFound")); + } + boolean direct = df.isIdentifierRegistered(); + DataCitation citation = new DataCitation(fm, direct); + return ok(citation.toString(true)); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Groups.java b/src/main/java/edu/harvard/iq/dataverse/api/Groups.java index d56a787c7ff..ed996b8ecf9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Groups.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Groups.java @@ -88,8 +88,8 @@ public Response postIpGroup( JsonObject dto ){ * that group from being created. */ @PUT - @Path("ip/{groupName}") - public Response putIpGroups( @PathParam("groupName") String groupName, JsonObject dto ){ + @Path("ip/{group}") + public Response putIpGroups( @PathParam("group") String groupName, JsonObject dto ){ try { if ( groupName == null || groupName.trim().isEmpty() ) { return badRequest("Group name cannot be empty"); @@ -118,8 +118,8 @@ public Response listIpGroups() { } @GET - @Path("ip/{groupIdtf}") - public Response getIpGroup( @PathParam("groupIdtf") String groupIdtf ) { + @Path("ip/{group}") + public Response getIpGroup( @PathParam("group") String groupIdtf ) { IpGroup grp; if ( isNumeric(groupIdtf) ) { grp = ipGroupPrv.get( Long.parseLong(groupIdtf) ); @@ -131,8 +131,8 @@ public Response getIpGroup( @PathParam("groupIdtf") String groupIdtf ) { } @DELETE - @Path("ip/{groupIdtf}") - public Response deleteIpGroup( @PathParam("groupIdtf") String groupIdtf ) { + @Path("ip/{group}") + public Response deleteIpGroup( @PathParam("group") String groupIdtf ) { IpGroup grp; if ( isNumeric(groupIdtf) ) { grp = ipGroupPrv.get( Long.parseLong(groupIdtf) ); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index 4910c460b6a..c30a77acb58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -215,7 +215,7 @@ public Response clearSolrIndex() { return error(Status.INTERNAL_SERVER_ERROR, ex.getLocalizedMessage()); } } - + @GET @Path("{type}/{id}") public Response indexTypeById(@PathParam("type") String type, @PathParam("id") Long id) { @@ -326,6 +326,29 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } } + /** + * Clears the entry for a dataset from Solr + * + * @param id numer id of the dataset + * @return response; + * will return 404 if no such dataset in the database; but will attempt to + * clear the entry from Solr regardless. + */ + @DELETE + @Path("datasets/{id}") + public Response clearDatasetFromIndex(@PathParam("id") Long id) { + Dataset dataset = datasetService.find(id); + // We'll attempt to delete the Solr document regardless of whether the + // dataset exists in the database: + String response = indexService.removeSolrDocFromIndex(IndexServiceBean.solrDocIdentifierDataset + id); + if (dataset != null) { + return ok("Sent request to clear Solr document for dataset " + id + ": " + response); + } else { + return notFound("Could not find dataset " + id + " in the database. Requested to clear from Solr anyway: " + response); + } + } + + /** * This is just a demo of the modular math logic we use for indexAll. */ diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Info.java b/src/main/java/edu/harvard/iq/dataverse/api/Info.java index 0652539b595..257519677d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Info.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Info.java @@ -1,19 +1,35 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.api.auth.AuthRequired; +import java.io.FileInputStream; +import java.io.InputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import jakarta.ws.rs.Produces; +import org.apache.commons.io.IOUtils; + import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.ejb.EJB; import jakarta.json.Json; import jakarta.json.JsonValue; import jakarta.ws.rs.GET; import jakarta.ws.rs.Path; -import jakarta.ws.rs.container.ContainerRequestContext; -import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.PathParam; +import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; +import org.eclipse.microprofile.openapi.annotations.Operation; +import org.eclipse.microprofile.openapi.annotations.responses.APIResponse; +import org.eclipse.microprofile.openapi.annotations.tags.Tag; @Path("info") +@Tag(name = "info", description = "General information about the Dataverse installation.") public class Info extends AbstractApiBean { @EJB @@ -22,6 +38,8 @@ public class Info extends AbstractApiBean { @EJB SystemConfig systemConfig; + private static final Logger logger = Logger.getLogger(Info.class.getCanonicalName()); + @GET @Path("settings/:DatasetPublishPopupCustomText") public Response getDatasetPublishPopupCustomText() { @@ -35,30 +53,30 @@ public Response getMaxEmbargoDurationInMonths() { } @GET - @AuthRequired @Path("version") - public Response getInfo(@Context ContainerRequestContext crc) { + @Operation(summary = "Get version and build information", description = "Get version and build information") + @APIResponse(responseCode = "200", + description = "Version and build information") + public Response getInfo() { String versionStr = systemConfig.getVersion(true); String[] comps = versionStr.split("build",2); String version = comps[0].trim(); JsonValue build = comps.length > 1 ? Json.createArrayBuilder().add(comps[1].trim()).build().get(0) : JsonValue.NULL; - - return response( req -> ok( Json.createObjectBuilder().add("version", version) - .add("build", build)), getRequestUser(crc)); + return ok(Json.createObjectBuilder() + .add("version", version) + .add("build", build)); } @GET - @AuthRequired @Path("server") - public Response getServer(@Context ContainerRequestContext crc) { - return response( req -> ok(JvmSettings.FQDN.lookup()), getRequestUser(crc)); + public Response getServer() { + return ok(JvmSettings.FQDN.lookup()); } @GET - @AuthRequired @Path("apiTermsOfUse") - public Response getTermsOfUse(@Context ContainerRequestContext crc) { - return response( req -> ok(systemConfig.getApiTermsOfUse()), getRequestUser(crc)); + public Response getTermsOfUse() { + return ok(systemConfig.getApiTermsOfUse()); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java index 05d12f1083c..6a9c608dc13 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/LDNInbox.java @@ -1,12 +1,9 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.MailServiceBean; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.RoleAssignment; @@ -15,6 +12,9 @@ import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDNamespace; @@ -134,13 +134,13 @@ public Response acceptMessage(String body) { .getString("@id"); if (citedResource.getString("@type").equals(JsonLDTerm.schemaOrg("Dataset").getUrl())) { logger.fine("Raw PID: " + pid); - if (pid.startsWith(DOIServiceBean.DOI_RESOLVER_URL)) { - pid = pid.replace(DOIServiceBean.DOI_RESOLVER_URL, DOIServiceBean.DOI_PROTOCOL + ":"); - } else if (pid.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) { - pid = pid.replace(HandlenetServiceBean.HDL_RESOLVER_URL, HandlenetServiceBean.HDL_PROTOCOL + ":"); + if (pid.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL)) { + pid = pid.replace(AbstractDOIProvider.DOI_RESOLVER_URL, AbstractDOIProvider.DOI_PROTOCOL + ":"); + } else if (pid.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) { + pid = pid.replace(HandlePidProvider.HDL_RESOLVER_URL, HandlePidProvider.HDL_PROTOCOL + ":"); } logger.fine("Protocol PID: " + pid); - Optional id = GlobalIdServiceBean.parse(pid); + Optional id = PidProvider.parse(pid); Dataset dataset = datasetSvc.findByGlobalId(pid); if (dataset != null) { JsonObject citingResource = Json.createObjectBuilder().add("@id", citingPID) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 6b48dbf8415..1f2f1039327 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -2,17 +2,23 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitations; import edu.harvard.iq.dataverse.makedatacount.DatasetExternalCitationsServiceBean; import edu.harvard.iq.dataverse.makedatacount.DatasetMetrics; import edu.harvard.iq.dataverse.makedatacount.DatasetMetricsServiceBean; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessStateServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStream; import java.net.HttpURLConnection; -import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import java.util.List; @@ -25,6 +31,8 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; +import jakarta.ws.rs.DELETE; +import jakarta.ws.rs.GET; import jakarta.ws.rs.POST; import jakarta.ws.rs.Path; import jakarta.ws.rs.PathParam; @@ -43,6 +51,8 @@ public class MakeDataCountApi extends AbstractApiBean { @EJB DatasetMetricsServiceBean datasetMetricsService; @EJB + MakeDataCountProcessStateServiceBean makeDataCountProcessStateService; + @EJB DatasetExternalCitationsServiceBean datasetExternalCitationsService; @EJB DatasetServiceBean datasetService; @@ -83,26 +93,21 @@ public Response sendDataToHub() { @Path("{id}/addUsageMetricsFromSushiReport") public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) { - JsonObject report; - - try (FileReader reader = new FileReader(reportOnDisk)) { - report = Json.createReader(reader).readObject(); - Dataset dataset; - try { - dataset = findDatasetOrDie(id); - List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset); - if (!datasetMetrics.isEmpty()) { - for (DatasetMetrics dm : datasetMetrics) { - datasetMetricsService.save(dm); - } + try { + JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk); + Dataset dataset = findDatasetOrDie(id); + List datasetMetrics = datasetMetricsService.parseSushiReport(report, dataset); + if (!datasetMetrics.isEmpty()) { + for (DatasetMetrics dm : datasetMetrics) { + datasetMetricsService.save(dm); } - } catch (WrappedResponse ex) { - Logger.getLogger(MakeDataCountApi.class.getName()).log(Level.SEVERE, null, ex); - return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage()); } + } catch (WrappedResponse ex) { + logger.log(Level.SEVERE, null, ex); + return error(Status.BAD_REQUEST, "Wrapped response: " + ex.getLocalizedMessage()); } catch (IOException ex) { - System.out.print(ex.getMessage()); + logger.log(Level.WARNING, ex.getMessage()); return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage()); } String msg = "Dummy Data has been added to dataset " + id; @@ -111,12 +116,10 @@ public Response addUsageMetricsFromSushiReport(@PathParam("id") String id, @Quer @POST @Path("/addUsageMetricsFromSushiReport") - public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @QueryParam("reportOnDisk") String reportOnDisk) { - - JsonObject report; + public Response addUsageMetricsFromSushiReportAll(@QueryParam("reportOnDisk") String reportOnDisk) { - try (FileReader reader = new FileReader(reportOnDisk)) { - report = Json.createReader(reader).readObject(); + try { + JsonObject report = JsonUtil.getJsonObjectFromFile(reportOnDisk); List datasetMetrics = datasetMetricsService.parseSushiReport(report, null); if (!datasetMetrics.isEmpty()) { @@ -126,7 +129,7 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q } } catch (IOException ex) { - System.out.print(ex.getMessage()); + logger.log(Level.WARNING, ex.getMessage()); return error(Status.BAD_REQUEST, "IOException: " + ex.getLocalizedMessage()); } String msg = "Usage Metrics Data has been added to all datasets from file " + reportOnDisk; @@ -135,11 +138,17 @@ public Response addUsageMetricsFromSushiReportAll(@PathParam("id") String id, @Q @POST @Path("{id}/updateCitationsForDataset") - public Response updateCitationsForDataset(@PathParam("id") String id) throws MalformedURLException, IOException { + public Response updateCitationsForDataset(@PathParam("id") String id) throws IOException { try { Dataset dataset = findDatasetOrDie(id); - String persistentId = dataset.getGlobalId().toString(); - //ToDo - if this isn't a DOI? + GlobalId pid = dataset.getGlobalId(); + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + // Only supported for DOIs and for DataCite DOI providers + if(!DataCiteDOIProvider.TYPE.equals(pidProvider.getProviderType())) { + return error(Status.BAD_REQUEST, "Only DataCite DOI providers are supported"); + } + String persistentId = pid.toString(); + // DataCite wants "doi=", not "doi:". String authorityPlusIdentifier = persistentId.replaceFirst("doi:", ""); // Request max page size and then loop to handle multiple pages @@ -158,7 +167,10 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws Mal logger.warning("Failed to get citations from " + url.toString()); return error(Status.fromStatusCode(status), "Failed to get citations from " + url.toString()); } - JsonObject report = Json.createReader(connection.getInputStream()).readObject(); + JsonObject report; + try (InputStream inStream = connection.getInputStream()) { + report = JsonUtil.getJsonObject(inStream); + } JsonObject links = report.getJsonObject("links"); JsonArray data = report.getJsonArray("data"); Iterator iter = data.iterator(); @@ -194,5 +206,51 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws Mal return wr.getResponse(); } } + @GET + @Path("{yearMonth}/processingState") + public Response getProcessingState(@PathParam("yearMonth") String yearMonth) { + MakeDataCountProcessState mdcps; + try { + mdcps = makeDataCountProcessStateService.getMakeDataCountProcessState(yearMonth); + } catch (IllegalArgumentException e) { + return error(Status.BAD_REQUEST,e.getMessage()); + } + if (mdcps != null) { + JsonObjectBuilder output = Json.createObjectBuilder(); + output.add("yearMonth", mdcps.getYearMonth()); + output.add("state", mdcps.getState().name()); + output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString()); + return ok(output); + } else { + return error(Status.NOT_FOUND, "Could not find an existing process state for " + yearMonth); + } + } + + @POST + @Path("{yearMonth}/processingState") + public Response updateProcessingState(@PathParam("yearMonth") String yearMonth, @QueryParam("state") String state) { + MakeDataCountProcessState mdcps; + try { + mdcps = makeDataCountProcessStateService.setMakeDataCountProcessState(yearMonth, state); + } catch (Exception e) { + return badRequest(e.getMessage()); + } + + JsonObjectBuilder output = Json.createObjectBuilder(); + output.add("yearMonth", mdcps.getYearMonth()); + output.add("state", mdcps.getState().name()); + output.add("stateChangeTimestamp", mdcps.getStateChangeTime().toString()); + return ok(output); + } + @DELETE + @Path("{yearMonth}/processingState") + public Response deleteProcessingState(@PathParam("yearMonth") String yearMonth) { + boolean deleted = makeDataCountProcessStateService.deleteMakeDataCountProcessState(yearMonth); + if (deleted) { + return ok("Processing State deleted for " + yearMonth); + } else { + return notFound("Processing State not found for " + yearMonth); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MetadataBlocks.java b/src/main/java/edu/harvard/iq/dataverse/api/MetadataBlocks.java index 448fb48e389..8861abd4803 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MetadataBlocks.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MetadataBlocks.java @@ -1,34 +1,33 @@ package edu.harvard.iq.dataverse.api; import edu.harvard.iq.dataverse.MetadataBlock; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.Produces; +import jakarta.ws.rs.*; import jakarta.ws.rs.core.Response; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.brief; -import jakarta.ws.rs.PathParam; + +import java.util.List; + import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; /** * Api bean for managing metadata blocks. + * * @author michael */ @Path("metadatablocks") @Produces("application/json") public class MetadataBlocks extends AbstractApiBean { - + @GET - public Response list() { - return ok(metadataBlockSvc.listMetadataBlocks().stream().map(brief::json).collect(toJsonArray())); + public Response listMetadataBlocks(@QueryParam("onlyDisplayedOnCreate") boolean onlyDisplayedOnCreate, + @QueryParam("returnDatasetFieldTypes") boolean returnDatasetFieldTypes) { + List metadataBlocks = metadataBlockSvc.listMetadataBlocks(onlyDisplayedOnCreate); + return ok(json(metadataBlocks, returnDatasetFieldTypes, onlyDisplayedOnCreate)); } - + @Path("{identifier}") @GET - public Response getBlock( @PathParam("identifier") String idtf ) { + public Response getMetadataBlock(@PathParam("identifier") String idtf) { MetadataBlock b = findMetadataBlock(idtf); - - return (b != null ) ? ok(json(b)) : notFound("Can't find metadata block '" + idtf + "'"); + return (b != null) ? ok(json(b)) : notFound("Can't find metadata block '" + idtf + "'"); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java index 7bb2570334b..452e5df9f9a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java @@ -547,6 +547,98 @@ public Response getDownloadsPastDays(@Context UriInfo uriInfo, @PathParam("days" return ok(jsonObj); } + /** Accounts */ + + @GET + @Path("accounts") + public Response getAccountsAllTime(@Context UriInfo uriInfo) { + return getAccountsToMonth(uriInfo, MetricsUtil.getCurrentMonth()); + } + + @GET + @Path("accounts/toMonth/{yyyymm}") + public Response getAccountsToMonth(@Context UriInfo uriInfo, @PathParam("yyyymm") String yyyymm) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accountsToMonth"; + String sanitizedyyyymm = MetricsUtil.sanitizeYearMonthUserInput(yyyymm); + JsonObject jsonObj = MetricsUtil.stringToJsonObject(metricsSvc.returnUnexpiredCacheMonthly(metricName, sanitizedyyyymm, null, null)); + + if (null == jsonObj) { // run query and save + Long count; + try { + count = metricsSvc.accountsToMonth(sanitizedyyyymm); + } catch (ParseException e) { + return error(BAD_REQUEST, "Unable to parse supplied date: " + e.getLocalizedMessage()); + } + jsonObj = MetricsUtil.countToJson(count).build(); + metricsSvc.save(new Metric(metricName, sanitizedyyyymm, null, null, jsonObj.toString())); + } + + return ok(jsonObj); + } + + @GET + @Path("accounts/pastDays/{days}") + public Response getAccountsPastDays(@Context UriInfo uriInfo, @PathParam("days") int days) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accountsPastDays"; + + if (days < 1) { + return error(BAD_REQUEST, "Invalid parameter for number of days."); + } + + JsonObject jsonObj = MetricsUtil.stringToJsonObject(metricsSvc.returnUnexpiredCacheDayBased(metricName, String.valueOf(days), null, null)); + + if (null == jsonObj) { // run query and save + Long count = metricsSvc.accountsPastDays(days); + jsonObj = MetricsUtil.countToJson(count).build(); + metricsSvc.save(new Metric(metricName, String.valueOf(days), null, null, jsonObj.toString())); + } + + return ok(jsonObj); + } + + @GET + @Path("accounts/monthly") + @Produces("text/csv, application/json") + public Response getAccountsTimeSeries(@Context Request req, @Context UriInfo uriInfo) { + + try { + errorIfUnrecongizedQueryParamPassed(uriInfo, new String[] { }); + } catch (IllegalArgumentException ia) { + return error(BAD_REQUEST, ia.getLocalizedMessage()); + } + + String metricName = "accounts"; + JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, null, null)); + + if (null == jsonArray) { // run query and save + // Only handling published right now + jsonArray = metricsSvc.accountsTimeSeries(); + metricsSvc.save(new Metric(metricName, null, null, null, jsonArray.toString())); + } + + MediaType requestedType = getVariant(req, MediaType.valueOf(FileUtil.MIME_TYPE_CSV), MediaType.APPLICATION_JSON_TYPE); + if ((requestedType != null) && (requestedType.equals(MediaType.APPLICATION_JSON_TYPE))) { + return ok(jsonArray); + } + return ok(FileUtil.jsonArrayOfObjectsToCSV(jsonArray, MetricsUtil.DATE, MetricsUtil.COUNT), MediaType.valueOf(FileUtil.MIME_TYPE_CSV), "accounts.timeseries.csv"); + } + + /** MakeDataCount */ + @GET @Path("makeDataCount/{metric}") public Response getMakeDataCountMetricCurrentMonth(@Context UriInfo uriInfo, @PathParam("metric") String metricSupplied, @QueryParam("country") String country, @QueryParam("parentAlias") String parentAlias) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java b/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java index 37c894d3071..df172f36973 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Notifications.java @@ -55,7 +55,6 @@ public Response getAllNotificationsForUser(@Context ContainerRequestContext crc) notificationObjectBuilder.add("id", notification.getId()); notificationObjectBuilder.add("type", type.toString()); /* FIXME - Re-add reasons for return if/when they are added to the notifications page. - if (Type.RETURNEDDS.equals(type) || Type.SUBMITTEDDS.equals(type)) { JsonArrayBuilder reasons = getReasonsForReturn(notification); for (JsonValue reason : reasons.build()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Pids.java b/src/main/java/edu/harvard/iq/dataverse/api/Pids.java index 534e42fd505..4ad57bceb58 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Pids.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Pids.java @@ -130,4 +130,41 @@ public Response deletePid(@Context ContainerRequestContext crc, @PathParam("id") } } + @GET + @AuthRequired + @Path("providers") + @Produces(MediaType.APPLICATION_JSON) + public Response getPidProviders(@Context ContainerRequestContext crc) throws WrappedResponse { + try { + getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + return ok(PidUtil.getProviders()); + } + + @GET + @AuthRequired + // The :.+ suffix allows PIDs with a / char to be entered w/o escaping + @Path("providers/{persistentId:.+}") + @Produces(MediaType.APPLICATION_JSON) + public Response getPidProviderId(@Context ContainerRequestContext crc, @PathParam("persistentId") String persistentId) throws WrappedResponse { + try { + getRequestAuthenticatedUserOrDie(crc); + } catch (WrappedResponse ex) { + return ex.getResponse(); + } + GlobalId globalId = PidUtil.parseAsGlobalID(persistentId); + if(globalId== null) { + return error(Response.Status.NOT_FOUND, "No provider found for PID"); + } else { + String providerId = globalId.getProviderId(); + if(PidUtil.getManagedProviderIds().contains(providerId)) { + return ok(globalId.getProviderId()); + } else { + return ok("PID recognized as an unmanaged " + globalId.getProtocol()); + } + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index c760534ca7b..6b9fcb38305 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -1,10 +1,8 @@ package edu.harvard.iq.dataverse.api; -import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.search.SearchFields; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.search.FacetCategory; import edu.harvard.iq.dataverse.search.FacetLabel; import edu.harvard.iq.dataverse.search.SolrSearchResult; @@ -16,7 +14,6 @@ import edu.harvard.iq.dataverse.search.SearchConstants; import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchUtil; -import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SortBy; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.IOException; @@ -26,6 +23,7 @@ import java.util.Map; import java.util.logging.Logger; import jakarta.ejb.EJB; +import jakarta.inject.Inject; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; @@ -51,10 +49,8 @@ public class Search extends AbstractApiBean { SearchServiceBean searchService; @EJB DataverseServiceBean dataverseService; - @EJB - DvObjectServiceBean dvObjectService; - @EJB - SolrIndexServiceBean SolrIndexService; + @Inject + DatasetVersionFilesServiceBean datasetVersionFilesServiceBean; @GET @AuthRequired @@ -157,7 +153,9 @@ public Response search( numResultsPerPage, true, //SEK get query entities always for search API additional Dataset Information 6300 12/6/2019 geoPoint, - geoRadius + geoRadius, + showFacets, // facets are expensive, no need to ask for them if not requested + showRelevance // no need for highlights unless requested either ); } catch (SearchException ex) { Throwable cause = ex; @@ -177,7 +175,7 @@ public Response search( JsonArrayBuilder itemsArrayBuilder = Json.createArrayBuilder(); List solrSearchResults = solrQueryResponse.getSolrSearchResults(); for (SolrSearchResult solrSearchResult : solrSearchResults) { - itemsArrayBuilder.add(solrSearchResult.toJsonObject(showRelevance, showEntityIds, showApiUrls, metadataFields)); + itemsArrayBuilder.add(solrSearchResult.json(showRelevance, showEntityIds, showApiUrls, metadataFields, getDatasetFileCount(solrSearchResult))); } JsonObjectBuilder spelling_alternatives = Json.createObjectBuilder(); @@ -185,31 +183,32 @@ public Response search( spelling_alternatives.add(entry.getKey(), entry.getValue().toString()); } - JsonArrayBuilder facets = Json.createArrayBuilder(); - JsonObjectBuilder facetCategoryBuilder = Json.createObjectBuilder(); - for (FacetCategory facetCategory : solrQueryResponse.getFacetCategoryList()) { - JsonObjectBuilder facetCategoryBuilderFriendlyPlusData = Json.createObjectBuilder(); - JsonArrayBuilder facetLabelBuilderData = Json.createArrayBuilder(); - for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - JsonObjectBuilder countBuilder = Json.createObjectBuilder(); - countBuilder.add(facetLabel.getName(), facetLabel.getCount()); - facetLabelBuilderData.add(countBuilder); - } - facetCategoryBuilderFriendlyPlusData.add("friendly", facetCategory.getFriendlyName()); - facetCategoryBuilderFriendlyPlusData.add("labels", facetLabelBuilderData); - facetCategoryBuilder.add(facetCategory.getName(), facetCategoryBuilderFriendlyPlusData); - } - facets.add(facetCategoryBuilder); - JsonObjectBuilder value = Json.createObjectBuilder() .add("q", query) .add("total_count", solrQueryResponse.getNumResultsFound()) .add("start", solrQueryResponse.getResultsStart()) .add("spelling_alternatives", spelling_alternatives) .add("items", itemsArrayBuilder.build()); + if (showFacets) { + JsonArrayBuilder facets = Json.createArrayBuilder(); + JsonObjectBuilder facetCategoryBuilder = Json.createObjectBuilder(); + for (FacetCategory facetCategory : solrQueryResponse.getFacetCategoryList()) { + JsonObjectBuilder facetCategoryBuilderFriendlyPlusData = Json.createObjectBuilder(); + JsonArrayBuilder facetLabelBuilderData = Json.createArrayBuilder(); + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + JsonObjectBuilder countBuilder = Json.createObjectBuilder(); + countBuilder.add(facetLabel.getName(), facetLabel.getCount()); + facetLabelBuilderData.add(countBuilder); + } + facetCategoryBuilderFriendlyPlusData.add("friendly", facetCategory.getFriendlyName()); + facetCategoryBuilderFriendlyPlusData.add("labels", facetLabelBuilderData); + facetCategoryBuilder.add(facetCategory.getName(), facetCategoryBuilderFriendlyPlusData); + } + facets.add(facetCategoryBuilder); value.add("facets", facets); } + value.add("count_in_response", solrSearchResults.size()); /** * @todo Returning the fq might be useful as a troubleshooting aid @@ -230,6 +229,15 @@ public Response search( } } + private Long getDatasetFileCount(SolrSearchResult solrSearchResult) { + DvObject dvObject = solrSearchResult.getEntity(); + if (dvObject.isInstanceofDataset()) { + DatasetVersion datasetVersion = ((Dataset) dvObject).getVersionFromId(solrSearchResult.getDatasetVersionId()); + return datasetVersionFilesServiceBean.getFileMetadataCount(datasetVersion); + } + return null; + } + private User getUser(ContainerRequestContext crc) throws WrappedResponse { User userToExecuteSearchAs = GuestUser.get(); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java index 87be1f14e05..46747b50c29 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestApi.java @@ -21,7 +21,7 @@ public class TestApi extends AbstractApiBean { @GET @Path("datasets/{id}/externalTools") - public Response getExternalToolsforFile(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied) { + public Response getDatasetExternalToolsforFile(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied) { ExternalTool.Type type; try { type = ExternalTool.Type.fromString(typeSupplied); @@ -44,6 +44,34 @@ public Response getExternalToolsforFile(@PathParam("id") String idSupplied, @Que return wr.getResponse(); } } + + @GET + @Path("datasets/{id}/externalTool/{toolId}") + public Response getExternalToolforDatasetById(@PathParam("id") String idSupplied, @PathParam("toolId") String toolId, @QueryParam("type") String typeSupplied) { + ExternalTool.Type type; + try { + type = ExternalTool.Type.fromString(typeSupplied); + } catch (IllegalArgumentException ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + Dataset dataset; + try { + dataset = findDatasetOrDie(idSupplied); + JsonArrayBuilder tools = Json.createArrayBuilder(); + List datasetTools = externalToolService.findDatasetToolsByType(type); + for (ExternalTool tool : datasetTools) { + ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataset, apiToken, null); + JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); + if (tool.getId().toString().equals(toolId)) { + return ok(toolToJson); + } + } + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + return error(BAD_REQUEST, "Could not find external tool with id of " + toolId); + } @Path("files/{id}/externalTools") @GET @@ -71,5 +99,31 @@ public Response getExternalToolsForFile(@PathParam("id") String idSupplied, @Que return wr.getResponse(); } } + + @Path("files/{id}/externalTool/{toolId}") + @GET + public Response getExternalToolForFileById(@PathParam("id") String idSupplied, @QueryParam("type") String typeSupplied, @PathParam("toolId") String toolId) { + ExternalTool.Type type; + try { + type = ExternalTool.Type.fromString(typeSupplied); + } catch (IllegalArgumentException ex) { + return error(BAD_REQUEST, ex.getLocalizedMessage()); + } + try { + DataFile dataFile = findDataFileOrDie(idSupplied); + List datasetTools = externalToolService.findFileToolsByTypeAndContentType(type, dataFile.getContentType()); + for (ExternalTool tool : datasetTools) { + ApiToken apiToken = externalToolService.getApiToken(getRequestApiKey()); + ExternalToolHandler externalToolHandler = new ExternalToolHandler(tool, dataFile, apiToken, dataFile.getFileMetadata(), null); + JsonObjectBuilder toolToJson = externalToolService.getToolAsJsonWithQueryParameters(externalToolHandler); + if (externalToolService.meetsRequirements(tool, dataFile) && tool.getId().toString().equals(toolId)) { + return ok(toolToJson); + } + } + return error(BAD_REQUEST, "Could not find external tool with id of " + toolId); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java index 05ba150df8e..add43ea2091 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/TestIngest.java @@ -100,7 +100,7 @@ public String datafile(@QueryParam("fileName") String fileName, @QueryParam("fil TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { output = output.concat("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage()); return output; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index 791fc7aa774..1f5430340c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -234,7 +234,7 @@ public Response getTraces(@Context ContainerRequestContext crc, @PathParam("iden @AuthRequired @Path("{identifier}/traces/{element}") @Produces("text/csv, application/json") - public Response getTraces(@Context ContainerRequestContext crc, @Context Request req, @PathParam("identifier") String identifier, @PathParam("element") String element) { + public Response getTracesElement(@Context ContainerRequestContext crc, @Context Request req, @PathParam("identifier") String identifier, @PathParam("element") String element) { try { AuthenticatedUser userToQuery = authSvc.getAuthenticatedUser(identifier); if(!elements.contains(element)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java index 8d5024c1c14..15478aacff7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/WorkflowsAdmin.java @@ -111,9 +111,9 @@ public Response deleteDefault(@PathParam("triggerType") String triggerType) { } } - @Path("/{identifier}") + @Path("/{id}") @GET - public Response getWorkflow(@PathParam("identifier") String identifier ) { + public Response getWorkflow(@PathParam("id") String identifier ) { try { long idtf = Long.parseLong(identifier); return workflows.getWorkflow(idtf) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java index f8572144236..258661f6495 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/auth/SignedUrlAuthMechanism.java @@ -3,7 +3,10 @@ import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.UrlSignerUtil; @@ -27,16 +30,18 @@ public class SignedUrlAuthMechanism implements AuthMechanism { @Inject protected AuthenticationServiceBean authSvc; - + @Inject + protected PrivateUrlServiceBean privateUrlSvc; + @Override public User findUserFromRequest(ContainerRequestContext containerRequestContext) throws WrappedAuthErrorResponse { String signedUrlRequestParameter = getSignedUrlRequestParameter(containerRequestContext); if (signedUrlRequestParameter == null) { return null; } - AuthenticatedUser authUser = getAuthenticatedUserFromSignedUrl(containerRequestContext); - if (authUser != null) { - return authUser; + User user = getAuthenticatedUserFromSignedUrl(containerRequestContext); + if (user != null) { + return user; } throw new WrappedAuthErrorResponse(RESPONSE_MESSAGE_BAD_SIGNED_URL); } @@ -45,8 +50,8 @@ private String getSignedUrlRequestParameter(ContainerRequestContext containerReq return containerRequestContext.getUriInfo().getQueryParameters().getFirst(SIGNED_URL_TOKEN); } - private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) { - AuthenticatedUser authUser = null; + private User getAuthenticatedUserFromSignedUrl(ContainerRequestContext containerRequestContext) { + User user = null; // The signedUrl contains a param telling which user this is supposed to be for. // We don't trust this. So we lookup that user, and get their API key, and use // that as a secret in validating the signedURL. If the signature can't be @@ -54,17 +59,26 @@ private AuthenticatedUser getAuthenticatedUserFromSignedUrl(ContainerRequestCont // we reject the request. UriInfo uriInfo = containerRequestContext.getUriInfo(); String userId = uriInfo.getQueryParameters().getFirst(SIGNED_URL_USER); - AuthenticatedUser targetUser = authSvc.getAuthenticatedUser(userId); - ApiToken userApiToken = authSvc.findApiTokenByUser(targetUser); + User targetUser = null; + ApiToken userApiToken = null; + if (!userId.startsWith(PrivateUrlUser.PREFIX)) { + targetUser = authSvc.getAuthenticatedUser(userId); + userApiToken = authSvc.findApiTokenByUser((AuthenticatedUser) targetUser); + } else { + PrivateUrl privateUrl = privateUrlSvc.getPrivateUrlFromDatasetId(Long.parseLong(userId.substring(PrivateUrlUser.PREFIX.length()))); + userApiToken = new ApiToken(); + userApiToken.setTokenString(privateUrl.getToken()); + targetUser = privateUrlSvc.getPrivateUrlUserFromToken(privateUrl.getToken()); + } if (targetUser != null && userApiToken != null) { String signedUrl = URLDecoder.decode(uriInfo.getRequestUri().toString(), StandardCharsets.UTF_8); String requestMethod = containerRequestContext.getMethod(); String signedUrlSigningKey = JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + userApiToken.getTokenString(); boolean isSignedUrlValid = UrlSignerUtil.isValidUrl(signedUrl, userId, requestMethod, signedUrlSigningKey); if (isSignedUrlValid) { - authUser = targetUser; + user = targetUser; } } - return authUser; + return user; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java index 5bc50903be8..a81848bd7af 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/CollectionDepositManagerImpl.java @@ -6,6 +6,7 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -14,6 +15,7 @@ import edu.harvard.iq.dataverse.api.imports.ImportGenericServiceBean; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import java.util.logging.Level; @@ -44,6 +46,8 @@ public class CollectionDepositManagerImpl implements CollectionDepositManager { @EJB DatasetServiceBean datasetService; @EJB + DvObjectServiceBean dvObjectService; + @EJB PermissionServiceBean permissionService; @Inject SwordAuth swordAuth; @@ -96,13 +100,10 @@ public DepositReceipt createNew(String collectionUri, Deposit deposit, AuthCrede Dataset dataset = new Dataset(); dataset.setOwner(dvThatWillOwnDataset); - String nonNullDefaultIfKeyNotFound = ""; - String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - - dataset.setProtocol(protocol); - dataset.setAuthority(authority); - //Wait until the create command before actually getting an identifier + PidProvider pidProvider = dvObjectService.getEffectivePidGenerator(dataset); + dataset.setProtocol(pidProvider.getProtocol()); + dataset.setAuthority(pidProvider.getAuthority()); + //Wait until the create command before actually getting an identifier logger.log(Level.FINE, "DS Deposit identifier: {0}", dataset.getIdentifier()); AbstractCreateDatasetCommand createDatasetCommand = new CreateNewDatasetCommand(dataset, dvReq); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java index a878720cc39..3f5345d8e0d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/datadeposit/MediaResourceManagerImpl.java @@ -11,7 +11,6 @@ import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; -import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; @@ -19,6 +18,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.ConstraintViolationUtil; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -307,9 +307,9 @@ DepositReceipt replaceOrAddFiles(String uri, Deposit deposit, AuthCredentials au try { //CreateDataFileResult createDataFilesResponse = FileUtil.createDataFiles(editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, null, systemConfig); - UserStorageQuota quota = null; + UploadSessionQuotaLimit quota = null; if (systemConfig.isStorageQuotasEnforced()) { - quota = dataFileService.getUserStorageQuota(user, dataset); + quota = dataFileService.getUploadSessionQuotaLimit(dataset); } Command cmd = new CreateNewDataFilesCommand(dvReq, editVersion, deposit.getInputStream(), uploadedZipFilename, guessContentTypeForMe, null, quota, null); CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 73a83035fc5..85d4868605d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -828,9 +828,9 @@ private HashSet processGeoBndBox(XMLStreamReader xmlr) throws XMLStrea } else if (xmlr.getLocalName().equals("eastBL")) { addToSet(set,"eastLongitude", parseText(xmlr)); } else if (xmlr.getLocalName().equals("southBL")) { - addToSet(set,"southLongitude", parseText(xmlr)); + addToSet(set,"southLatitude", parseText(xmlr)); } else if (xmlr.getLocalName().equals("northBL")) { - addToSet(set,"northLongitude", parseText(xmlr)); + addToSet(set,"northLatitude", parseText(xmlr)); } } else if (event == XMLStreamConstants.END_ELEMENT) { if (xmlr.getLocalName().equals("geoBndBox")) break; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java index f7a6cf54dd5..6068ec45e4f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportGenericServiceBean.java @@ -2,7 +2,6 @@ import com.google.gson.Gson; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; @@ -11,13 +10,14 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.ForeignMetadataFieldMapping; import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.api.dto.*; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.license.LicenseServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.json.JsonParseException; @@ -352,7 +352,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { if (!otherIds.isEmpty()) { // We prefer doi or hdl identifiers like "doi:10.7910/DVN/1HE30F" for (String otherId : otherIds) { - if (otherId.startsWith(DOIServiceBean.DOI_PROTOCOL) || otherId.startsWith(HandlenetServiceBean.HDL_PROTOCOL) || otherId.startsWith(DOIServiceBean.DOI_RESOLVER_URL) || otherId.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.HTTP_DOI_RESOLVER_URL) || otherId.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.DXDOI_RESOLVER_URL) || otherId.startsWith(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL)) { + if (otherId.startsWith(AbstractDOIProvider.DOI_PROTOCOL) || otherId.startsWith(HandlePidProvider.HDL_PROTOCOL) || otherId.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL) || otherId.startsWith(HandlePidProvider.HDL_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.HTTP_DOI_RESOLVER_URL) || otherId.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.DXDOI_RESOLVER_URL) || otherId.startsWith(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL)) { return otherId; } } @@ -361,7 +361,7 @@ private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) { try { HandleResolver hr = new HandleResolver(); hr.resolveHandle(otherId); - return HandlenetServiceBean.HDL_PROTOCOL + ":" + otherId; + return HandlePidProvider.HDL_PROTOCOL + ":" + otherId; } catch (HandleException e) { logger.fine("Not a valid handle: " + e.toString()); } @@ -388,7 +388,7 @@ public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO d String protocol = identifierString.substring(0, index1); - if (DOIServiceBean.DOI_PROTOCOL.equals(protocol) || HandlenetServiceBean.HDL_PROTOCOL.equals(protocol) || PermaLinkPidProviderServiceBean.PERMA_PROTOCOL.equals(protocol)) { + if (AbstractDOIProvider.DOI_PROTOCOL.equals(protocol) || HandlePidProvider.HDL_PROTOCOL.equals(protocol) || PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { logger.fine("Processing hdl:- or doi:- or perma:-style identifier : "+identifierString); } else if ("http".equalsIgnoreCase(protocol) || "https".equalsIgnoreCase(protocol)) { @@ -396,21 +396,21 @@ public String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO d // We also recognize global identifiers formatted as global resolver URLs: //ToDo - refactor index1 always has -1 here so that we can use index1+1 later //ToDo - single map of protocol/url, are all three cases the same then? - if (identifierString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL) || identifierString.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL)) { + if (identifierString.startsWith(HandlePidProvider.HDL_RESOLVER_URL) || identifierString.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL)) { logger.fine("Processing Handle identifier formatted as a resolver URL: "+identifierString); - protocol = HandlenetServiceBean.HDL_PROTOCOL; - index1 = (identifierString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) ? HandlenetServiceBean.HDL_RESOLVER_URL.length() - 1 : HandlenetServiceBean.HTTP_HDL_RESOLVER_URL.length() - 1; + protocol = HandlePidProvider.HDL_PROTOCOL; + index1 = (identifierString.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) ? HandlePidProvider.HDL_RESOLVER_URL.length() - 1 : HandlePidProvider.HTTP_HDL_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); - } else if (identifierString.startsWith(DOIServiceBean.DOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.HTTP_DOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.DXDOI_RESOLVER_URL) || identifierString.startsWith(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL)) { + } else if (identifierString.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.HTTP_DOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.DXDOI_RESOLVER_URL) || identifierString.startsWith(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL)) { logger.fine("Processing DOI identifier formatted as a resolver URL: "+identifierString); - protocol = DOIServiceBean.DOI_PROTOCOL; - identifierString = identifierString.replace(DOIServiceBean.DXDOI_RESOLVER_URL, DOIServiceBean.DOI_RESOLVER_URL); - identifierString = identifierString.replace(DOIServiceBean.HTTP_DXDOI_RESOLVER_URL, DOIServiceBean.HTTP_DOI_RESOLVER_URL); - index1 = (identifierString.startsWith(DOIServiceBean.DOI_RESOLVER_URL)) ? DOIServiceBean.DOI_RESOLVER_URL.length() - 1 : DOIServiceBean.HTTP_DOI_RESOLVER_URL.length() - 1; + protocol = AbstractDOIProvider.DOI_PROTOCOL; + identifierString = identifierString.replace(AbstractDOIProvider.DXDOI_RESOLVER_URL, AbstractDOIProvider.DOI_RESOLVER_URL); + identifierString = identifierString.replace(AbstractDOIProvider.HTTP_DXDOI_RESOLVER_URL, AbstractDOIProvider.HTTP_DOI_RESOLVER_URL); + index1 = (identifierString.startsWith(AbstractDOIProvider.DOI_RESOLVER_URL)) ? AbstractDOIProvider.DOI_RESOLVER_URL.length() - 1 : AbstractDOIProvider.HTTP_DOI_RESOLVER_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); - } else if (identifierString.startsWith(PermaLinkPidProviderServiceBean.PERMA_RESOLVER_URL + Dataset.TARGET_URL)) { - protocol = PermaLinkPidProviderServiceBean.PERMA_PROTOCOL; - index1 = PermaLinkPidProviderServiceBean.PERMA_RESOLVER_URL.length() + + Dataset.TARGET_URL.length() - 1; + } else if (identifierString.startsWith(PermaLinkPidProvider.PERMA_RESOLVER_URL + Dataset.TARGET_URL)) { + protocol = PermaLinkPidProvider.PERMA_PROTOCOL; + index1 = PermaLinkPidProvider.PERMA_RESOLVER_URL.length() + + Dataset.TARGET_URL.length() - 1; index2 = identifierString.indexOf("/", index1 + 1); } else { logger.warning("HTTP Url in supplied as the identifier is neither a Handle nor DOI resolver: "+identifierString); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index c17ba909230..39977190691 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -38,6 +38,8 @@ import edu.harvard.iq.dataverse.util.json.JsonParser; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -421,8 +423,9 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o // For ImportType.NEW, if the user supplies a global identifier, and it's not a protocol // we support, it will be rejected. + if (importType.equals(ImportType.NEW)) { - if (ds.getGlobalId().asString() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) { + if (ds.getGlobalId().asString() != null && !PidUtil.getPidProvider(ds.getGlobalId().getProviderId()).canManagePID()) { throw new ImportException("Could not register id " + ds.getGlobalId().asString() + ", protocol not supported"); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthFilter.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthFilter.java index a2cf3082ae7..c93a1496c17 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthFilter.java @@ -29,9 +29,7 @@ public void init(FilterConfig filterConfig) throws ServletException { logger.info(AuthFilter.class.getName() + "initialized. filterConfig.getServletContext().getServerInfo(): " + filterConfig.getServletContext().getServerInfo()); try { - String glassfishLogsDirectory = "logs"; - - FileHandler logFile = new FileHandler(".." + File.separator + glassfishLogsDirectory + File.separator + "authfilter.log"); + FileHandler logFile = new FileHandler( System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "authfilter.log"); SimpleFormatter formatterTxt = new SimpleFormatter(); logFile.setFormatter(formatterTxt); logger.addHandler(logFile); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java index 496620cd6e8..4a8fb123fd4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/AuthenticationServiceBean.java @@ -21,10 +21,14 @@ import edu.harvard.iq.dataverse.authorization.providers.shib.ShibAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData; import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailServiceBean; import edu.harvard.iq.dataverse.passwordreset.PasswordResetData; import edu.harvard.iq.dataverse.passwordreset.PasswordResetServiceBean; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.validation.PasswordValidatorServiceBean; @@ -118,6 +122,9 @@ public class AuthenticationServiceBean { @EJB SavedSearchServiceBean savedSearchService; + @EJB + PrivateUrlServiceBean privateUrlService; + @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; @@ -300,11 +307,9 @@ public AuthenticatedUser getUpdateAuthenticatedUser( String authenticationProvid if (user != null && !user.isDeactivated()) { user = userService.updateLastLogin(user); } - + if ( user == null ) { throw new IllegalStateException("Authenticated user does not exist. The functionality to support creating one at this point in authentication has been removed."); - //return createAuthenticatedUser( - // new UserRecordIdentifier(authenticationProviderId, resp.getUserId()), resp.getUserId(), resp.getUserDisplayInfo(), true ); } else { if (BuiltinAuthenticationProvider.PROVIDER_ID.equals(user.getAuthenticatedUserLookup().getAuthenticationProviderId())) { return user; @@ -615,6 +620,7 @@ public AuthenticatedUser createAuthenticatedUser(UserRecordIdentifier userRecord String identifier = internalUserIdentifier + i; while ( identifierExists(identifier) ) { i += 1; + identifier = internalUserIdentifier + i; } authenticatedUser.setUserIdentifier(identifier); } else { @@ -931,14 +937,45 @@ public List getWorkflowCommentsByAuthenticatedUser(Authenticat return query.getResultList(); } - public ApiToken getValidApiTokenForUser(AuthenticatedUser user) { + /** + * This method gets a valid api token for an AuthenticatedUser, creating a new + * token if one doesn't exist or if the token is expired. + * + * @param user + * @return + */ + public ApiToken getValidApiTokenForAuthenticatedUser(AuthenticatedUser user) { ApiToken apiToken = null; apiToken = findApiTokenByUser(user); - if ((apiToken == null) || (apiToken.getExpireTime().before(new Date()))) { + if ((apiToken == null) || apiToken.isExpired()) { logger.fine("Created apiToken for user: " + user.getIdentifier()); apiToken = generateApiTokenForUser(user); } return apiToken; } + /** + * Gets a token for an AuthenticatedUser or a PrivateUrlUser. It will create a + * new token if needed for an AuthenticatedUser. Note that, for a PrivateUrlUser, this method creates a token + * with a temporary AuthenticateUser that only has a userIdentifier - needed in generating signed Urls. + * @param user + * @return a token or null (i.e. if the user is not an AuthenticatedUser or PrivateUrlUser) + */ + + public ApiToken getValidApiTokenForUser(User user) { + ApiToken apiToken = null; + if (user instanceof AuthenticatedUser) { + apiToken = getValidApiTokenForAuthenticatedUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + + PrivateUrl privateUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privateUrl.getToken()); + AuthenticatedUser au = new AuthenticatedUser(); + au.setUserIdentifier(privateUrlUser.getIdentifier()); + apiToken.setAuthenticatedUser(au); + } + return apiToken; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java index 0fd0852b4df..8f3dc07fdea 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBean.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2; import edu.harvard.iq.dataverse.DataverseSession; +import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationProvider; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier; @@ -65,6 +66,9 @@ public class OAuth2LoginBackingBean implements Serializable { @EJB SystemConfig systemConfig; + @EJB + UserServiceBean userService; + @Inject DataverseSession session; @@ -128,6 +132,7 @@ public void exchangeCodeForToken() throws IOException { } else { // login the user and redirect to HOME of intended page (if any). // setUser checks for deactivated users. + dvUser = userService.updateLastLogin(dvUser); session.setUser(dvUser); final OAuth2TokenData tokenData = oauthUser.getTokenData(); if (tokenData != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java index 3cbfc3cdcac..d6d3e0317ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/AuthenticatedUser.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.json.JsonPrinter; import static edu.harvard.iq.dataverse.util.StringUtil.nonEmpty; + import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import java.io.Serializable; import java.sql.Timestamp; @@ -42,6 +43,7 @@ import jakarta.persistence.PostLoad; import jakarta.persistence.PrePersist; import jakarta.persistence.Transient; +import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; @@ -68,7 +70,8 @@ @NamedQuery( name="AuthenticatedUser.filter", query="select au from AuthenticatedUser au WHERE (" + "LOWER(au.userIdentifier) like LOWER(:query) OR " - + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query))"), + + "lower(concat(au.firstName,' ',au.lastName)) like lower(:query) or " + + "lower(au.email) like lower(:query))"), @NamedQuery( name="AuthenticatedUser.findAdminUser", query="select au from AuthenticatedUser au WHERE " + "au.superuser = true " @@ -145,6 +148,10 @@ public class AuthenticatedUser implements User, Serializable { @Transient private Set mutedNotificationsSet = new HashSet<>(); + @Column(nullable=false) + @Min(value = 1, message = "Rate Limit Tier must be greater than 0.") + private int rateLimitTier = 1; + @PrePersist void prePersist() { mutedNotifications = Type.toStringValue(mutedNotificationsSet); @@ -396,6 +403,13 @@ public void setDeactivatedTime(Timestamp deactivatedTime) { this.deactivatedTime = deactivatedTime; } + public int getRateLimitTier() { + return rateLimitTier; + } + public void setRateLimitTier(int rateLimitTier) { + this.rateLimitTier = rateLimitTier; + } + @OneToOne(mappedBy = "authenticatedUser") private AuthenticatedUserLookup authenticatedUserLookup; @@ -434,7 +448,6 @@ public void setShibIdentityProvider(String shibIdentityProvider) { public JsonObjectBuilder toJson() { //JsonObjectBuilder authenicatedUserJson = Json.createObjectBuilder(); - NullSafeJsonBuilder authenicatedUserJson = NullSafeJsonBuilder.jsonObjectBuilder(); authenicatedUserJson.add("id", this.id); diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java index f64b5c301e7..03f018221fd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/users/PrivateUrlUser.java @@ -12,7 +12,7 @@ */ public class PrivateUrlUser implements User { - public static final String PREFIX = "#"; + public static final String PREFIX = "!"; /** * In the future, this could probably be dvObjectId rather than datasetId, diff --git a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java index ba34a3d1ed1..af1e9c6a294 100644 --- a/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java +++ b/src/main/java/edu/harvard/iq/dataverse/batch/jobs/importer/filesystem/FileRecordWriter.java @@ -33,6 +33,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -58,7 +59,6 @@ import java.util.logging.Level; import java.util.logging.Logger; import jakarta.servlet.http.HttpServletRequest; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; @Named @Dependent @@ -360,31 +360,22 @@ private DataFile createPackageDataFile(List files) { if (commandEngine.getContext().systemConfig().isFilePIDsEnabledForCollection(dataset.getOwner())) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(packageFile.getProtocol(), commandEngine.getContext()); + PidProvider pidProvider = commandEngine.getContext().dvObjects().getEffectivePidGenerator(dataset); if (packageFile.getIdentifier() == null || packageFile.getIdentifier().isEmpty()) { - packageFile.setIdentifier(idServiceBean.generateDataFileIdentifier(packageFile)); - } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = commandEngine.getContext().settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - if (packageFile.getProtocol() == null) { - packageFile.setProtocol(protocol); - } - if (packageFile.getAuthority() == null) { - packageFile.setAuthority(authority); + pidProvider.generatePid(packageFile); } if (!packageFile.isIdentifierRegistered()) { String doiRetString = ""; - idServiceBean = GlobalIdServiceBean.getBean(commandEngine.getContext()); + try { - doiRetString = idServiceBean.createIdentifier(packageFile); + doiRetString = pidProvider.createIdentifier(packageFile); } catch (Throwable e) { } // Check return value to make sure registration succeeded - if (!idServiceBean.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { + if (!pidProvider.registerWhenPublished() && doiRetString.contains(packageFile.getIdentifier())) { packageFile.setIdentifierRegistered(true); packageFile.setGlobalIdCreateTime(new Date()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java new file mode 100644 index 00000000000..18269f6970e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/AbstractRemoteOverlayAccessIO.java @@ -0,0 +1,338 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.nio.channels.Channel; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.function.Predicate; +import java.util.logging.Logger; + +import javax.net.ssl.SSLContext; + +import org.apache.http.client.config.CookieSpecs; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObject; + + +/** + * A base class for StorageIO implementations supporting remote access. At present, that includes the RemoteOverlayAccessIO store and the newer GlobusOverlayAccessIO store. It primarily includes + * common methods for handling auxiliary files in the configured base store. + * @param + */ +public abstract class AbstractRemoteOverlayAccessIO extends StorageIO { + + protected static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); + public static final String REFERENCE_ENDPOINTS_WITH_BASEPATHS = "reference-endpoints-with-basepaths"; + static final String BASE_STORE = "base-store"; + protected static final String SECRET_KEY = "secret-key"; + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + protected static final String REMOTE_STORE_NAME = "remote-store-name"; + protected static final String REMOTE_STORE_URL = "remote-store-url"; + + // Whether Dataverse can access the file bytes + // Currently False only for the Globus store when using the S3Connector, and Remote Stores like simple web servers where the URLs resolve to the actual file bits + static final String FILES_NOT_ACCESSIBLE_BY_DATAVERSE = "files-not-accessible-by-dataverse"; + + protected StorageIO baseStore = null; + protected String path = null; + protected PoolingHttpClientConnectionManager cm = null; + CloseableHttpClient httpclient = null; + protected static HttpClientContext localContext = HttpClientContext.create(); + + protected int timeout = 1200; + protected RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) + .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) + .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); + protected static boolean trustCerts = false; + protected int httpConcurrency = 4; + + public static String getBaseStoreIdFor(String driverId) { + return getConfigParamForDriver(driverId, BASE_STORE); + } + + public AbstractRemoteOverlayAccessIO() { + super(); + } + + public AbstractRemoteOverlayAccessIO(String storageLocation, String driverId) { + super(storageLocation, driverId); + } + + public AbstractRemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) { + super(dvObject, req, driverId); + } + + @Override + public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { + return baseStore.openAuxChannel(auxItemTag, options); + } + + @Override + public boolean isAuxObjectCached(String auxItemTag) throws IOException { + return baseStore.isAuxObjectCached(auxItemTag); + } + + @Override + public long getAuxObjectSize(String auxItemTag) throws IOException { + return baseStore.getAuxObjectSize(auxItemTag); + } + + @Override + public Path getAuxObjectAsPath(String auxItemTag) throws IOException { + return baseStore.getAuxObjectAsPath(auxItemTag); + } + + @Override + public void backupAsAux(String auxItemTag) throws IOException { + baseStore.backupAsAux(auxItemTag); + } + + @Override + public void revertBackupAsAux(String auxItemTag) throws IOException { + baseStore.revertBackupAsAux(auxItemTag); + } + + @Override + public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { + baseStore.savePathAsAux(fileSystemPath, auxItemTag); + } + + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); + } + + /** + * @param inputStream InputStream we want to save + * @param auxItemTag String representing this Auxiliary type ("extension") + * @throws IOException if anything goes wrong. + */ + @Override + public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { + baseStore.saveInputStreamAsAux(inputStream, auxItemTag); + } + + @Override + public List listAuxObjects() throws IOException { + return baseStore.listAuxObjects(); + } + + @Override + public void deleteAuxObject(String auxItemTag) throws IOException { + baseStore.deleteAuxObject(auxItemTag); + } + + @Override + public void deleteAllAuxObjects() throws IOException { + baseStore.deleteAllAuxObjects(); + } + + @Override + public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { + return baseStore.getAuxFileAsInputStream(auxItemTag); + } + + protected int getUrlExpirationMinutes() { + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); + if (optionValue != null) { + Integer num; + try { + num = Integer.parseInt(optionValue); + } catch (NumberFormatException ex) { + num = null; + } + if (num != null) { + return num; + } + } + return 60; + } + + public CloseableHttpClient getSharedHttpClient() { + if (httpclient == null) { + try { + initHttpPool(); + httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); + + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + logger.warning(ex.getMessage()); + } + } + return httpclient; + } + + private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { + if (trustCerts) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; + + sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed + // certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + + Registry registry = RegistryBuilder.create() + .register("https", connectionFactory).build(); + cm = new PoolingHttpClientConnectionManager(registry); + } else { + cm = new PoolingHttpClientConnectionManager(); + } + cm.setDefaultMaxPerRoute(httpConcurrency); + cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); + } + + @Override + abstract public long retrieveSizeFromMedia(); + + @Override + public boolean exists() { + logger.fine("Exists called"); + return (retrieveSizeFromMedia() != -1); + } + + @Override + public List cleanUp(Predicate filter, boolean dryRun) throws IOException { + return baseStore.cleanUp(filter, dryRun); + } + + @Override + public String getStorageLocation() throws IOException { + String fullStorageLocation = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStorageLocation); + int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStorageLocation = fullStorageLocation + .substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + if (this.getDvObject() instanceof Dataset) { + throw new IOException("AbstractRemoteOverlayAccessIO: Datasets are not a supported dvObject"); + } else if (this.getDvObject() instanceof DataFile) { + fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; + } else if (dvObject instanceof Dataverse) { + throw new IOException("AbstractRemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStorageLocation: " + fullStorageLocation); + return fullStorageLocation; + } + protected void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { + + if (baseStore == null) { + String baseDriverId = getBaseStoreIdFor(driverId); + String fullStorageLocation = null; + String baseDriverType = getConfigParamForDriver(baseDriverId, StorageIO.TYPE, + DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + + if (dvObject instanceof Dataset) { + baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); + } else { + if (this.getDvObject() != null) { + fullStorageLocation = getStoragePath(); + + // S3 expects :/// + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + + } else if (storageLocation != null) { + // ://// + // remoteDriverId:// is removed if coming through directStorageIO + int index = storageLocation.indexOf(DataAccess.SEPARATOR); + if (index > 0) { + storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); + } + // The base store needs the baseStoreIdentifier and not the relative URL (if it exists) + int endOfId = storageLocation.indexOf("//"); + fullStorageLocation = (endOfId>-1) ? storageLocation.substring(0, endOfId) : storageLocation; + + switch (baseDriverType) { + case DataAccess.S3: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, S3AccessIO.BUCKET_NAME) + "/" + + fullStorageLocation; + break; + case DataAccess.FILE: + fullStorageLocation = baseDriverId + DataAccess.SEPARATOR + + getConfigParamForDriver(baseDriverId, FileAccessIO.DIRECTORY, "/tmp/files") + + "/" + fullStorageLocation; + break; + default: + logger.warning("Not Supported: " + this.getClass().getName() + " store with base store type: " + + getConfigParamForDriver(baseDriverId, StorageIO.TYPE)); + throw new IOException("Not supported"); + } + } + baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); + } + if (baseDriverType.contentEquals(DataAccess.S3)) { + ((S3AccessIO) baseStore).setMainDriver(false); + } + } + remoteStoreName = getConfigParam(REMOTE_STORE_NAME); + try { + remoteStoreUrl = new URL(getConfigParam(REMOTE_STORE_URL)); + } catch (MalformedURLException mfue) { + logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); + } + } + + protected String getStoragePath() throws IOException { + String fullStoragePath = dvObject.getStorageIdentifier(); + logger.fine("storageidentifier: " + fullStoragePath); + int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); + if (driverIndex >= 0) { + fullStoragePath = fullStoragePath + .substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); + } + int suffixIndex = fullStoragePath.indexOf("//"); + if (suffixIndex >= 0) { + fullStoragePath = fullStoragePath.substring(0, suffixIndex); + } + if (getDvObject() instanceof Dataset) { + fullStoragePath = getDataset().getAuthorityForFileStorage() + "/" + + getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (getDvObject() instanceof DataFile) { + fullStoragePath = getDataFile().getOwner().getAuthorityForFileStorage() + "/" + + getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; + } else if (dvObject instanceof Dataverse) { + throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); + } + logger.fine("fullStoragePath: " + fullStoragePath); + return fullStoragePath; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java index d046fa4661d..bc4c69390cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/DataAccess.java @@ -48,6 +48,7 @@ public DataAccess() { public static final String S3 = "s3"; static final String SWIFT = "swift"; static final String REMOTE = "remote"; + public static final String GLOBUS = "globus"; static final String TMP = "tmp"; public static final String SEPARATOR = "://"; //Default to "file" is for tests only @@ -98,6 +99,8 @@ protected static StorageIO getStorageIO(T dvObject, Data return new SwiftAccessIO<>(dvObject, req, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(dvObject, req, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(dvObject, req, storageDriverId); case TMP: throw new IOException( "DataAccess IO attempted on a temporary file that hasn't been permanently saved yet."); @@ -129,6 +132,8 @@ public static StorageIO getDirectStorageIO(String fullStorageLocation) return new SwiftAccessIO<>(storageLocation, storageDriverId); case REMOTE: return new RemoteOverlayAccessIO<>(storageLocation, storageDriverId); + case GLOBUS: + return new GlobusOverlayAccessIO<>(storageLocation, storageDriverId); default: logger.warning("Could not find storage driver for: " + fullStorageLocation); throw new IOException("getDirectStorageIO: Unsupported storage method."); @@ -148,19 +153,41 @@ public static String[] getDriverIdAndStorageLocation(String storageLocation) { } public static String getStorageIdFromLocation(String location) { - if(location.contains(SEPARATOR)) { - //It's a full location with a driverId, so strip and reapply the driver id - //NOte that this will strip the bucketname out (which s3 uses) but the S3IOStorage class knows to look at re-insert it - return location.substring(0,location.indexOf(SEPARATOR) +3) + location.substring(location.lastIndexOf('/')+1); - } - return location.substring(location.lastIndexOf('/')+1); + if (location.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return location.substring(0, location.indexOf(SEPARATOR) + 3) + + location.substring(location.lastIndexOf('/') + 1); + } + return location.substring(location.lastIndexOf('/') + 1); + } + + /** Changes storageidentifiers of the form + * s3://bucketname/18b39722140-50eb7d3c5ece or file://18b39722140-50eb7d3c5ece to s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * and + * 18b39722140-50eb7d3c5ece to 10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece + * @param id + * @param dataset + * @return + */ + public static String getLocationFromStorageId(String id, Dataset dataset) { + String path= dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/"; + if (id.contains(SEPARATOR)) { + // It's a full location with a driverId, so strip and reapply the driver id + // NOte that this will strip the bucketname out (which s3 uses) but the + // S3IOStorage class knows to look at re-insert it + return id.substring(0, id.indexOf(SEPARATOR) + 3) + path + + id.substring(id.lastIndexOf('/') + 1); + } + return path + id.substring(id.lastIndexOf('/') + 1); } public static String getDriverType(String driverId) { if(driverId.isEmpty() || driverId.equals("tmp")) { return "tmp"; } - return System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + return StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined"); } //This @@ -168,7 +195,7 @@ public static String getDriverPrefix(String driverId) throws IOException { if(driverId.isEmpty() || driverId.equals("tmp")) { return "tmp" + SEPARATOR; } - String storageType = System.getProperty("dataverse.files." + driverId + ".type", "Undefined"); + String storageType = StorageIO.getConfigParamForDriver(driverId, StorageIO.TYPE, "Undefined"); switch(storageType) { case FILE: return FileAccessIO.getDriverPrefix(driverId); @@ -236,12 +263,17 @@ public static StorageIO createNewStorageIO(T dvObject, S storageIO = new S3AccessIO<>(dvObject, null, storageDriverId); break; case REMOTE: - storageIO = createNewStorageIO(dvObject, storageTag, RemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; + case GLOBUS: + storageIO = createNewStorageIO(dvObject, storageTag, AbstractRemoteOverlayAccessIO.getBaseStoreIdFor(storageDriverId)) ; break; default: logger.warning("Could not find storage driver for: " + storageTag); throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); } + if (storageIO == null) { + logger.warning("Could not find storage driver for: " + storageTag); + throw new IOException("createDataAccessObject: Unsupported storage method " + storageDriverId); + } // Note: All storageIO classes must assure that dvObject instances' storageIdentifiers are prepended with // the :// + any additional storageIO type information required (e.g. the bucketname for s3/swift) // This currently happens when the storageIO is opened for write access @@ -369,9 +401,35 @@ public static boolean isValidDirectStorageIdentifier(String storageId) { return S3AccessIO.isValidIdentifier(driverId, storageId); case REMOTE: return RemoteOverlayAccessIO.isValidIdentifier(driverId, storageId); + case GLOBUS: + return GlobusOverlayAccessIO.isValidIdentifier(driverId, storageId); default: logger.warning("Request to validate for storage driver: " + driverId); } return false; } + + + + public static String getNewStorageIdentifier(String driverId) { + String storageType = DataAccess.getDriverType(driverId); + if (storageType.equals("tmp") || storageType.equals("Undefined")) { + return null; + } + switch (storageType) { + case FILE: + return FileAccessIO.getNewIdentifier(driverId); + case SWIFT: + return SwiftAccessIO.getNewIdentifier(driverId); + case S3: + return S3AccessIO.getNewIdentifier(driverId); + case REMOTE: + return RemoteOverlayAccessIO.getNewIdentifier(driverId); + case GLOBUS: + return GlobusOverlayAccessIO.getNewIdentifier(driverId); + default: + logger.warning("Request to validate for storage driver: " + driverId); + } + return null; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java index d95df1567bd..26637ec5742 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/FileAccessIO.java @@ -35,8 +35,6 @@ import java.util.List; import java.util.function.Predicate; import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.util.stream.Collectors; // Dataverse imports: @@ -55,6 +53,7 @@ public class FileAccessIO extends StorageIO { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.FileAccessIO"); + public static final String DIRECTORY = "directory"; public FileAccessIO() { @@ -115,13 +114,14 @@ public void open (DataAccessOption... options) throws IOException { this.setInputStream(fin); setChannel(fin.getChannel()); - this.setSize(getLocalFileSize()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -506,21 +506,6 @@ public void delete() throws IOException { // Auxilary helper methods, filesystem access-specific: - private long getLocalFileSize () { - long fileSize = -1; - - try { - File testFile = getFileSystemPath().toFile(); - if (testFile != null) { - fileSize = testFile.length(); - } - return fileSize; - } catch (IOException ex) { - return -1; - } - - } - public FileInputStream openLocalFileAsInputStream () { FileInputStream in; @@ -595,7 +580,7 @@ private String getDatasetDirectory() throws IOException { protected String getFilesRootDirectory() { - String filesRootDirectory = System.getProperty("dataverse.files." + this.driverId + ".directory", "/tmp/files"); + String filesRootDirectory = getConfigParam(DIRECTORY, "/tmp/files"); return filesRootDirectory; } @@ -742,4 +727,18 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE return toDelete; } + @Override + public long retrieveSizeFromMedia() { + long fileSize = -1; + try { + File testFile = getFileSystemPath().toFile(); + if (testFile != null) { + fileSize = testFile.length(); + } + return fileSize; + } catch (IOException ex) { + return -1; + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java new file mode 100644 index 00000000000..8bed60d8302 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/GlobusAccessibleStore.java @@ -0,0 +1,78 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; + +public interface GlobusAccessibleStore { + + //Whether Dataverse manages access controls for the Globus endpoint or not. + static final String MANAGED = "managed"; + /* + * transfer and reference endpoint formats: + * + * REFERENCE_ENDPOINTS_WITH_BASEPATHS - reference endpoints separated by a comma + */ + static final String TRANSFER_ENDPOINT_WITH_BASEPATH = "transfer-endpoint-with-basepath"; + static final String GLOBUS_TOKEN = "globus-token"; + + public static boolean isDataverseManaged(String driverId) { + return Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, MANAGED)); + } + + public static String getTransferEnpointWithPath(String driverId) { + return StorageIO.getConfigParamForDriver(driverId, GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + } + + public static String getTransferEndpointId(String driverId) { + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); + int pathStart = endpointWithBasePath.indexOf("/"); + return pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + } + + public static String getTransferPath(String driverId) { + String endpointWithBasePath = StorageIO.getConfigParamForDriver(driverId, TRANSFER_ENDPOINT_WITH_BASEPATH); + int pathStart = endpointWithBasePath.indexOf("/"); + return pathStart > 0 ? endpointWithBasePath.substring(pathStart) : ""; + + } + + public static JsonArray getReferenceEndpointsWithPaths(String driverId) { + String[] endpoints = StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*"); + JsonArrayBuilder builder = Json.createArrayBuilder(); + for(int i=0;i://// + * + * Storage location: + * / + * + * Internal StorageIdentifier format: + * :// + * + * Storage location: + * /// + * + */ +public class GlobusOverlayAccessIO extends AbstractRemoteOverlayAccessIO implements GlobusAccessibleStore { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIO"); + + /* + * If this is set to true, the store supports Globus transfer in and + * Dataverse/the globus app manage file locations, access controls, deletion, + * etc. + */ + private Boolean dataverseManaged = null; + + private String relativeDirectoryPath; + + private String endpointPath; + + private String filename; + + private String[] allowedEndpoints; + private String endpoint; + + public GlobusOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { + super(dvObject, req, driverId); + configureGlobusEndpoints(); + configureStores(req, driverId, null); + logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); + } + + + public GlobusOverlayAccessIO(String storageLocation, String driverId) throws IOException { + this.driverId = driverId; + configureGlobusEndpoints(); + configureStores(null, driverId, storageLocation); + if (isManaged()) { + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageLocation); + path = parts[1]; + } else { + this.setIsLocalFile(false); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Referenced path: " + path); + } + } + + private boolean isManaged() { + if(dataverseManaged==null) { + dataverseManaged = GlobusAccessibleStore.isDataverseManaged(this.driverId); + } + return dataverseManaged; + } + + private String retrieveGlobusAccessToken() { + String globusToken = getConfigParam(GlobusAccessibleStore.GLOBUS_TOKEN); + + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + return accessToken.getOtherTokens().get(0).getAccessToken(); + } + + private void parsePath() { + int filenameStart = path.lastIndexOf("/") + 1; + String endpointWithBasePath = null; + if (!isManaged()) { + endpointWithBasePath = findMatchingEndpoint(path, allowedEndpoints); + } else { + endpointWithBasePath = allowedEndpoints[0]; + } + //String endpointWithBasePath = baseEndpointPath.substring(baseEndpointPath.lastIndexOf(DataAccess.SEPARATOR) + 3); + int pathStart = endpointWithBasePath.indexOf("/"); + logger.fine("endpointWithBasePath: " + endpointWithBasePath); + endpointPath = "/" + (pathStart > 0 ? endpointWithBasePath.substring(pathStart + 1) : ""); + logger.fine("endpointPath: " + endpointPath); + + + if (isManaged() && (dvObject!=null)) { + + Dataset ds = null; + if (dvObject instanceof Dataset) { + ds = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + ds = ((DataFile) dvObject).getOwner(); + } + relativeDirectoryPath = "/" + ds.getAuthority() + "/" + ds.getIdentifier(); + } else { + relativeDirectoryPath = ""; + } + if (filenameStart > 0) { + relativeDirectoryPath = relativeDirectoryPath + path.substring(0, filenameStart); + } + logger.fine("relativeDirectoryPath finally: " + relativeDirectoryPath); + filename = path.substring(filenameStart); + endpoint = pathStart > 0 ? endpointWithBasePath.substring(0, pathStart) : endpointWithBasePath; + + + } + + private static String findMatchingEndpoint(String path, String[] allowedEndpoints) { + for(int i=0;i= 0) { + this.setSize(dataFile.getFilesize()); + } else { + logger.fine("Setting size"); + this.setSize(retrieveSizeFromMedia()); + } + // Only applies for the S3 Connector case (where we could have run an ingest) + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { + + List datavariables = dataFile.getDataTable().getDataVariables(); + String varHeaderLine = generateVariableHeader(datavariables); + this.setVarHeader(varHeaderLine); + } + + } + + this.setMimeType(dataFile.getContentType()); + + try { + this.setFileName(dataFile.getFileMetadata().getLabel()); + } catch (Exception ex) { + this.setFileName("unknown"); + } + } else if (dvObject instanceof Dataset) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } else if (dvObject instanceof Dataverse) { + throw new IOException( + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); + } + } + + @Override + public Path getFileSystemPath() throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void savePath(Path fileSystemPath) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void saveInputStream(InputStream inputStream) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + + @Override + public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { + throw new UnsupportedDataAccessOperationException( + this.getClass().getName() + ": savePath() not implemented in this storage driver."); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java index 2b4aed3a9a5..2435e3f778a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/ImageThumbConverter.java @@ -35,20 +35,21 @@ import javax.imageio.stream.ImageOutputStream; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.ByteArrayOutputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.OutputStream; import java.nio.channels.Channel; import java.nio.channels.Channels; -import java.nio.channels.FileChannel; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; import java.nio.file.Paths; import java.util.logging.Logger; + +import jakarta.enterprise.inject.spi.CDI; import org.apache.commons.io.IOUtils; //import org.primefaces.util.Base64; import java.util.Base64; @@ -109,20 +110,38 @@ private static boolean isThumbnailAvailable(StorageIO storageIO, int s return false; } + // check if thumbnail generation failed: + if (file.isPreviewImageFail()) { + logger.fine("Thumbnail failed to be generated for "+ file.getId()); + return false; + } + if (isThumbnailCached(storageIO, size)) { + logger.fine("Found cached thumbnail for " + file.getId()); return true; } + return generateThumbnail(file, storageIO, size); - logger.fine("Checking for thumbnail, file type: " + file.getContentType()); + } - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - return generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - return generatePDFThumbnail(storageIO, size); + private static boolean generateThumbnail(DataFile file, StorageIO storageIO, int size) { + logger.fine((file.isPreviewImageFail() ? "Not trying" : "Trying") + " to generate thumbnail, file id: " + file.getId()); + boolean thumbnailGenerated = false; + // Don't try to generate if there have been failures: + if (!file.isPreviewImageFail()) { + if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { + thumbnailGenerated = generateImageThumbnail(storageIO, size); + } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { + thumbnailGenerated = generatePDFThumbnail(storageIO, size); + } + if (!thumbnailGenerated) { + file.setPreviewImageFail(true); + file.setPreviewImageAvailable(false); + logger.fine("No thumbnail generated for " + file.getId()); + } } - return false; - + return thumbnailGenerated; } // Note that this method works on ALL file types for which thumbnail @@ -153,15 +172,30 @@ public static InputStreamIO getImageThumbnailAsInputStream(StorageIO s return null; } int cachedThumbnailSize = (int) storageIO.getAuxObjectSize(THUMBNAIL_SUFFIX + size); + InputStreamIO inputStreamIO = cachedThumbnailSize > 0 ? new InputStreamIO(cachedThumbnailInputStream, cachedThumbnailSize) : null; - InputStreamIO inputStreamIO = new InputStreamIO(cachedThumbnailInputStream, cachedThumbnailSize); - - inputStreamIO.setMimeType(THUMBNAIL_MIME_TYPE); + if (inputStreamIO != null) { + inputStreamIO.setMimeType(THUMBNAIL_MIME_TYPE); - String fileName = storageIO.getFileName(); - if (fileName != null) { - fileName = fileName.replaceAll("\\.[^\\.]*$", THUMBNAIL_FILE_EXTENSION); - inputStreamIO.setFileName(fileName); + String fileName = storageIO.getFileName(); + if (fileName != null) { + fileName = fileName.replaceAll("\\.[^\\.]*$", THUMBNAIL_FILE_EXTENSION); + inputStreamIO.setFileName(fileName); + } + } else { + if (storageIO.getDataFile() != null && cachedThumbnailSize == 0) { + // We found an older 0 length thumbnail. Newer image uploads will not have this issue. + // Once cleaned up, this thumbnail will no longer have this issue + logger.warning("Cleaning up zero sized thumbnail ID: "+ storageIO.getDataFile().getId()); + storageIO.getDataFile().setPreviewImageFail(true); + storageIO.getDataFile().setPreviewImageAvailable(false); + DataFileServiceBean datafileService = CDI.current().select(DataFileServiceBean.class).get(); + datafileService.save(storageIO.getDataFile()); + + // Now that we have marked this File as a thumbnail failure, + // no reason not to try and delete this 0-size cache here: + storageIO.deleteAuxObject(THUMBNAIL_SUFFIX + size); + } } return inputStreamIO; } catch (Exception ioex) { @@ -184,6 +218,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // We rely on ImageMagick to convert PDFs; so if it's not installed, // better give up right away: if (!isImageMagickInstalled()) { + logger.fine("Couldn't find ImageMagick"); return false; } @@ -195,6 +230,7 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s // will run the ImageMagick on it, and will save its output in another temp // file, and will save it as an "auxiliary" file via the driver. boolean tempFilesRequired = false; + File tempFile = null; try { Path pdfFilePath = storageIO.getFileSystemPath(); @@ -206,35 +242,33 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s tempFilesRequired = true; } catch (IOException ioex) { + logger.warning(ioex.getMessage()); // this on the other hand is likely a fatal condition :( return false; } if (tempFilesRequired) { - ReadableByteChannel pdfFileChannel; - + InputStream inputStream = null; try { storageIO.open(); - //inputStream = storageIO.getInputStream(); - pdfFileChannel = storageIO.getReadChannel(); + inputStream = storageIO.getInputStream(); } catch (Exception ioex) { logger.warning("caught Exception trying to open an input stream for " + storageIO.getDataFile().getStorageIdentifier()); return false; } - File tempFile; - FileChannel tempFileChannel = null; + OutputStream outputStream = null; try { tempFile = File.createTempFile("tempFileToRescale", ".tmp"); - tempFileChannel = new FileOutputStream(tempFile).getChannel(); - - tempFileChannel.transferFrom(pdfFileChannel, 0, storageIO.getSize()); + outputStream = new FileOutputStream(tempFile); + //Reads/transfers all bytes from the input stream to the output stream. + inputStream.transferTo(outputStream); } catch (IOException ioex) { logger.warning("GenerateImageThumb: failed to save pdf bytes in a temporary file."); return false; } finally { - IOUtils.closeQuietly(tempFileChannel); - IOUtils.closeQuietly(pdfFileChannel); + IOUtils.closeQuietly(inputStream); + IOUtils.closeQuietly(outputStream); } sourcePdfFile = tempFile; } @@ -258,6 +292,12 @@ private static boolean generatePDFThumbnail(StorageIO storageIO, int s logger.warning("failed to save generated pdf thumbnail, as AUX file " + THUMBNAIL_SUFFIX + size + "!"); return false; } + finally { + try { + tempFile.delete(); + } + catch (Exception e) {} + } } return true; @@ -289,6 +329,7 @@ private static boolean generateImageThumbnail(StorageIO storageIO, int private static boolean generateImageThumbnailFromInputStream(StorageIO storageIO, int size, InputStream inputStream) { BufferedImage fullSizeImage; + boolean thumbnailGenerated = false; try { logger.fine("attempting to read the image file with ImageIO.read(InputStream), " + storageIO.getDataFile().getStorageIdentifier()); @@ -341,26 +382,35 @@ private static boolean generateImageThumbnailFromInputStream(StorageIO try { rescaleImage(fullSizeImage, width, height, size, outputStream); - /* - // while we are at it, let's make sure other size thumbnails are - // generated too: - for (int s : (new int[]{DEFAULT_PREVIEW_SIZE, DEFAULT_THUMBNAIL_SIZE, DEFAULT_CARDIMAGE_SIZE})) { - if (size != s && !thumbnailFileExists(fileLocation, s)) { - rescaleImage(fullSizeImage, width, height, s, fileLocation); - } - } - */ if (tempFileRequired) { storageIO.savePathAsAux(Paths.get(tempFile.getAbsolutePath()), THUMBNAIL_SUFFIX + size); } + thumbnailGenerated = true; } catch (Exception ioex) { logger.warning("Failed to rescale and/or save the image: " + ioex.getMessage()); - return false; + thumbnailGenerated = false; + } + finally { + if(tempFileRequired) { + try { + tempFile.delete(); + } + catch (Exception e) {} + } else if (!thumbnailGenerated) { + // if it was a local file - let's make sure we are not leaving + // behind a half-baked, broken image - such as a 0-size file - + // if this was a failure. + try { + storageIO.deleteAuxObject(THUMBNAIL_SUFFIX + size); + } catch (IOException ioex) { + logger.fine("Failed attempt to delete the result of a failed thumbnail rescaling; this is most likely ok - for ex., because it was never created in the first place."); + } + } } - return true; + return thumbnailGenerated; } @@ -436,16 +486,8 @@ public static String getImageThumbnailAsBase64(DataFile file, int size) { if (cachedThumbnailChannel == null) { logger.fine("Null channel for aux object " + THUMBNAIL_SUFFIX + size); - // try to generate, if not available: - boolean generated = false; - if (file.getContentType().substring(0, 6).equalsIgnoreCase("image/")) { - generated = generateImageThumbnail(storageIO, size); - } else if (file.getContentType().equalsIgnoreCase("application/pdf")) { - generated = generatePDFThumbnail(storageIO, size); - } - - if (generated) { - // try to open again: + // try to generate, if not available and hasn't failed before + if(generateThumbnail(file, storageIO, size)) { try { cachedThumbnailChannel = storageIO.openAuxChannel(THUMBNAIL_SUFFIX + size); } catch (Exception ioEx) { @@ -526,12 +568,10 @@ private static String getImageAsBase64FromInputStream(InputStream inputStream) { public static String getImageAsBase64FromFile(File imageFile) { InputStream imageInputStream = null; try { - - int imageSize = (int) imageFile.length(); - - imageInputStream = new FileInputStream(imageFile); - - return getImageAsBase64FromInputStream(imageInputStream); //, imageSize); + if (imageFile.length() > 0) { + imageInputStream = new FileInputStream(imageFile); + return getImageAsBase64FromInputStream(imageInputStream); + } } catch (IOException ex) { // too bad - but not fatal logger.warning("getImageAsBase64FromFile: Failed to read data from thumbnail file"); @@ -591,16 +631,12 @@ public static String generateImageThumbnailFromFile(String fileLocation, int siz logger.fine("image dimensions: " + width + "x" + height); - thumbFileLocation = rescaleImage(fullSizeImage, width, height, size, fileLocation); + return rescaleImage(fullSizeImage, width, height, size, fileLocation); - if (thumbFileLocation != null) { - return thumbFileLocation; - } } catch (Exception e) { logger.warning("Failed to read in an image from " + fileLocation + ": " + e.getMessage()); } return null; - } /* @@ -639,10 +675,14 @@ public static String rescaleImage(BufferedImage fullSizeImage, int width, int he try { rescaleImage(fullSizeImage, width, height, size, outputFileStream); } catch (Exception ioex) { - logger.warning("caught Exceptiopn trying to create rescaled image " + outputLocation); - return null; + logger.warning("caught Exception trying to create rescaled image " + outputLocation); + outputLocation = null; } finally { IOUtils.closeQuietly(outputFileStream); + // delete the file if the rescaleImage failed + if (outputLocation == null) { + outputFile.delete(); + } } return outputLocation; @@ -698,13 +738,19 @@ private static void rescaleImage(BufferedImage fullSizeImage, int width, int hei if (iter.hasNext()) { writer = (ImageWriter) iter.next(); } else { - throw new IOException("Failed to locatie ImageWriter plugin for image type PNG"); + throw new IOException("Failed to locate ImageWriter plugin for image type PNG"); } - BufferedImage lowRes = new BufferedImage(thumbWidth, thumbHeight, BufferedImage.TYPE_INT_ARGB); - Graphics2D g2 = lowRes.createGraphics(); - g2.drawImage(thumbImage, 0, 0, null); - g2.dispose(); + BufferedImage lowRes = null; + try { + lowRes = new BufferedImage(thumbWidth, thumbHeight, BufferedImage.TYPE_INT_ARGB); + Graphics2D g2 = lowRes.createGraphics(); + g2.drawImage(thumbImage, 0, 0, null); + g2.dispose(); + } catch (Exception ex) { + logger.warning("Failed to create LoRes Image: " + ex.getMessage()); + throw new IOException("Caught exception trying to generate thumbnail: " + ex.getMessage()); + } try (ImageOutputStream ios = ImageIO.createImageOutputStream(outputStream);) { @@ -757,7 +803,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size) try { fileSize = new File(fileLocation).length(); } catch (Exception ex) { - // + logger.warning("Can't open file: " + fileLocation); } if (fileSize == 0 || fileSize > sizeLimit) { @@ -820,6 +866,7 @@ public static String generatePDFThumbnailFromFile(String fileLocation, int size) // generate the thumbnail for the requested size, *using the already scaled-down // 400x400 png version, above*: + // (the "exists()" check below appears to be unnecessary - we've already checked early on - ?) if (!((new File(thumbFileLocation)).exists())) { thumbFileLocation = runImageMagick(imageMagickExec, previewFileLocation, thumbFileLocation, size, "png"); } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java index be6f9df0254..de392b74cca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/InputStreamIO.java @@ -165,4 +165,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE throw new UnsupportedDataAccessOperationException("InputStreamIO: tthis method is not supported in this DataAccess driver."); } + @Override + public long retrieveSizeFromMedia() throws UnsupportedDataAccessOperationException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java index 66c6a4cc2ee..bca70259cb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIO.java @@ -11,105 +11,77 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URL; import java.nio.channels.Channel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.channels.WritableByteChannel; import java.nio.file.Path; -import java.security.KeyManagementException; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; import java.util.List; -import java.util.function.Predicate; -import java.util.logging.Logger; import org.apache.http.Header; -import org.apache.http.client.config.CookieSpecs; -import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.config.Registry; -import org.apache.http.config.RegistryBuilder; -import org.apache.http.conn.socket.ConnectionSocketFactory; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.HTTP; -import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; -import javax.net.ssl.SSLContext; - /** * @author qqmyers - * @param what it stores */ /* * Remote Overlay Driver * * StorageIdentifier format: - * ://// + * ://// + * + * baseUrl: http(s):// */ -public class RemoteOverlayAccessIO extends StorageIO { - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.RemoteOverlayAccessIO"); - - private StorageIO baseStore = null; - private String urlPath = null; - private String baseUrl = null; +public class RemoteOverlayAccessIO extends AbstractRemoteOverlayAccessIO { - private static HttpClientContext localContext = HttpClientContext.create(); - private PoolingHttpClientConnectionManager cm = null; - CloseableHttpClient httpclient = null; - private int timeout = 1200; - private RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout * 1000) - .setConnectionRequestTimeout(timeout * 1000).setSocketTimeout(timeout * 1000) - .setCookieSpec(CookieSpecs.STANDARD).setExpectContinueEnabled(true).build(); - private static boolean trustCerts = false; - private int httpConcurrency = 4; + // A single baseUrl of the form http(s):// where this store can reference data + static final String BASE_URL = "base-url"; + String baseUrl = null; + public RemoteOverlayAccessIO() { + super(); + } + public RemoteOverlayAccessIO(T dvObject, DataAccessRequest req, String driverId) throws IOException { super(dvObject, req, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(req, driverId, null); logger.fine("Parsing storageidentifier: " + dvObject.getStorageIdentifier()); - urlPath = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); - validatePath(urlPath); - - logger.fine("Base URL: " + urlPath); + path = dvObject.getStorageIdentifier().substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + validatePath(path); + + logger.fine("Relative path: " + path); } public RemoteOverlayAccessIO(String storageLocation, String driverId) throws IOException { super(null, null, driverId); this.setIsLocalFile(false); + configureRemoteEndpoints(); configureStores(null, driverId, storageLocation); - urlPath = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); - validatePath(urlPath); - logger.fine("Base URL: " + urlPath); + path = storageLocation.substring(storageLocation.lastIndexOf("//") + 2); + validatePath(path); + logger.fine("Relative path: " + path); } - - private void validatePath(String path) throws IOException { + + protected void validatePath(String relPath) throws IOException { try { - URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + URI absoluteURI = new URI(baseUrl + "/" + relPath); + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { throw new IOException("storageidentifier doesn't start with " + this.driverId + "'s base-url"); } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { throw new IOException("Could not interpret storageidentifier in remote store " + this.driverId); } - } - + } @Override public void open(DataAccessOption... options) throws IOException { @@ -150,10 +122,14 @@ public void open(DataAccessOption... options) throws IOException { this.setSize(dataFile.getFilesize()); } else { logger.fine("Setting size"); - this.setSize(getSizeFromHttpHeader()); + this.setSize(retrieveSizeFromMedia()); } - if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") - && dataFile.isTabularData() && dataFile.getDataTable() != null && (!this.noVarHeader())) { + if (dataFile.getContentType() != null + && dataFile.getContentType().equals("text/tab-separated-values") + && dataFile.isTabularData() + && dataFile.getDataTable() != null + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -171,18 +147,17 @@ public void open(DataAccessOption... options) throws IOException { } } else if (dvObject instanceof Dataset) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } else if (dvObject instanceof Dataverse) { throw new IOException( - "Data Access: RemoteOverlay Storage driver does not support dvObject type Dataverse yet"); - } else { - this.setSize(getSizeFromHttpHeader()); + "Data Access: " + this.getClass().getName() + " does not support dvObject type Dataverse yet"); } } - private long getSizeFromHttpHeader() { + @Override + public long retrieveSizeFromMedia() { long size = -1; - HttpHead head = new HttpHead(baseUrl + "/" + urlPath); + HttpHead head = new HttpHead(baseUrl + "/" + path); try { CloseableHttpResponse response = getSharedHttpClient().execute(head, localContext); @@ -224,12 +199,12 @@ public InputStream getInputStream() throws IOException { break; default: logger.warning("Response from " + get.getURI().toString() + " was " + code); - throw new IOException("Cannot retrieve: " + baseUrl + "/" + urlPath + " code: " + code); + throw new IOException("Cannot retrieve: " + baseUrl + "/" + path + " code: " + code); } } catch (Exception e) { logger.warning(e.getMessage()); e.printStackTrace(); - throw new IOException("Error retrieving: " + baseUrl + "/" + urlPath + " " + e.getMessage()); + throw new IOException("Error retrieving: " + baseUrl + "/" + path + " " + e.getMessage()); } setChannel(Channels.newChannel(super.getInputStream())); @@ -260,13 +235,13 @@ public void delete() throws IOException { throw new IOException("Direct Access IO must be used to permanently delete stored file objects"); } try { - HttpDelete del = new HttpDelete(baseUrl + "/" + urlPath); + HttpDelete del = new HttpDelete(baseUrl + "/" + path); CloseableHttpResponse response = getSharedHttpClient().execute(del, localContext); try { int code = response.getStatusLine().getStatusCode(); switch (code) { case 200: - logger.fine("Sent DELETE for " + baseUrl + "/" + urlPath); + logger.fine("Sent DELETE for " + baseUrl + "/" + path); default: logger.fine("Response from DELETE on " + del.getURI().toString() + " was " + code); } @@ -275,7 +250,7 @@ public void delete() throws IOException { } } catch (Exception e) { logger.warning(e.getMessage()); - throw new IOException("Error deleting: " + baseUrl + "/" + urlPath); + throw new IOException("Error deleting: " + baseUrl + "/" + path); } @@ -284,104 +259,12 @@ public void delete() throws IOException { } - @Override - public Channel openAuxChannel(String auxItemTag, DataAccessOption... options) throws IOException { - return baseStore.openAuxChannel(auxItemTag, options); - } - - @Override - public boolean isAuxObjectCached(String auxItemTag) throws IOException { - return baseStore.isAuxObjectCached(auxItemTag); - } - - @Override - public long getAuxObjectSize(String auxItemTag) throws IOException { - return baseStore.getAuxObjectSize(auxItemTag); - } - - @Override - public Path getAuxObjectAsPath(String auxItemTag) throws IOException { - return baseStore.getAuxObjectAsPath(auxItemTag); - } - - @Override - public void backupAsAux(String auxItemTag) throws IOException { - baseStore.backupAsAux(auxItemTag); - } - - @Override - public void revertBackupAsAux(String auxItemTag) throws IOException { - baseStore.revertBackupAsAux(auxItemTag); - } - - @Override - // this method copies a local filesystem Path into this DataAccess Auxiliary - // location: - public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException { - baseStore.savePathAsAux(fileSystemPath, auxItemTag); - } - - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag, filesize); - } - - /** - * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") - * @throws IOException if anything goes wrong. - */ - @Override - public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException { - baseStore.saveInputStreamAsAux(inputStream, auxItemTag); - } - - @Override - public List listAuxObjects() throws IOException { - return baseStore.listAuxObjects(); - } - - @Override - public void deleteAuxObject(String auxItemTag) throws IOException { - baseStore.deleteAuxObject(auxItemTag); - } - - @Override - public void deleteAllAuxObjects() throws IOException { - baseStore.deleteAllAuxObjects(); - } - - @Override - public String getStorageLocation() throws IOException { - String fullStorageLocation = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStorageLocation); - int driverIndex = fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStorageLocation = fullStorageLocation.substring(fullStorageLocation.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - if (this.getDvObject() instanceof Dataset) { - throw new IOException("RemoteOverlayAccessIO: Datasets are not a supported dvObject"); - } else if (this.getDvObject() instanceof DataFile) { - fullStorageLocation = StorageIO.getDriverPrefix(this.driverId) + fullStorageLocation; - } else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStorageLocation: " + fullStorageLocation); - return fullStorageLocation; - } - @Override public Path getFileSystemPath() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( "RemoteOverlayAccessIO: this is a remote DataAccess IO object, it has no local filesystem path associated with it."); } - @Override - public boolean exists() { - logger.fine("Exists called"); - return (getSizeFromHttpHeader() != -1); - } - @Override public WritableByteChannel getWriteChannel() throws UnsupportedDataAccessOperationException { throw new UnsupportedDataAccessOperationException( @@ -394,20 +277,15 @@ public OutputStream getOutputStream() throws UnsupportedDataAccessOperationExcep "RemoteOverlayAccessIO: there are no output Streams associated with S3 objects."); } - @Override - public InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException { - return baseStore.getAuxFileAsInputStream(auxItemTag); - } - @Override public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + String optionValue = getConfigParam(StorageIO.DOWNLOAD_REDIRECT); if ("true".equalsIgnoreCase(optionValue)) { return true; } return false; } - + public boolean downloadRedirectEnabled(String auxObjectTag) { return baseStore.downloadRedirectEnabled(auxObjectTag); } @@ -418,11 +296,11 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary // ToDo - support remote auxiliary Files if (auxiliaryTag == null) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".secret-key"); + String secretKey = getConfigParam(SECRET_KEY); if (secretKey == null) { - return baseUrl + "/" + urlPath; + return baseUrl + "/" + path; } else { - return UrlSignerUtil.signUrl(baseUrl + "/" + urlPath, getUrlExpirationMinutes(), null, "GET", + return UrlSignerUtil.signUrl(baseUrl + "/" + path, getUrlExpirationMinutes(), null, "GET", secretKey); } } else { @@ -430,27 +308,21 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary } } - int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); - if (optionValue != null) { - Integer num; - try { - num = Integer.parseInt(optionValue); - } catch (NumberFormatException ex) { - num = null; - } - if (num != null) { - return num; - } - } - return 60; - } - private void configureStores(DataAccessRequest req, String driverId, String storageLocation) throws IOException { - baseUrl = System.getProperty("dataverse.files." + this.driverId + ".base-url"); + /** This endpoint configures all the endpoints the store is allowed to reference data from. At present, the RemoteOverlayAccessIO only supports a single endpoint but + * the derived GlobusOverlayAccessIO can support multiple endpoints. + * @throws IOException + */ + protected void configureRemoteEndpoints() throws IOException { + baseUrl = getConfigParam(BASE_URL); if (baseUrl == null) { - throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); - } else { + //Will accept the first endpoint using the newer setting + baseUrl = getConfigParam(REFERENCE_ENDPOINTS_WITH_BASEPATHS).split("\\s*,\\s*")[0]; + if (baseUrl == null) { + throw new IOException("dataverse.files." + this.driverId + ".base-url is required"); + } + } + if (baseUrl != null) { try { new URI(baseUrl); } catch (Exception e) { @@ -460,180 +332,42 @@ private void configureStores(DataAccessRequest req, String driverId, String stor } } - - if (baseStore == null) { - String baseDriverId = getBaseStoreIdFor(driverId); - String fullStorageLocation = null; - String baseDriverType = System.getProperty("dataverse.files." + baseDriverId + ".type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); - - if(dvObject instanceof Dataset) { - baseStore = DataAccess.getStorageIO(dvObject, req, baseDriverId); - } else { - if (this.getDvObject() != null) { - fullStorageLocation = getStoragePath(); - - // S3 expects :/// - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - - } else if (storageLocation != null) { - // ://// - //remoteDriverId:// is removed if coming through directStorageIO - int index = storageLocation.indexOf(DataAccess.SEPARATOR); - if(index > 0) { - storageLocation = storageLocation.substring(index + DataAccess.SEPARATOR.length()); - } - //THe base store needs the baseStoreIdentifier and not the relative URL - fullStorageLocation = storageLocation.substring(0, storageLocation.indexOf("//")); - - switch (baseDriverType) { - case DataAccess.S3: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".bucket-name") + "/" - + fullStorageLocation; - break; - case DataAccess.FILE: - fullStorageLocation = baseDriverId + DataAccess.SEPARATOR - + System.getProperty("dataverse.files." + baseDriverId + ".directory", "/tmp/files") + "/" - + fullStorageLocation; - break; - default: - logger.warning("Not Implemented: RemoteOverlay store with base store type: " - + System.getProperty("dataverse.files." + baseDriverId + ".type")); - throw new IOException("Not implemented"); - } - } - baseStore = DataAccess.getDirectStorageIO(fullStorageLocation); - } - if (baseDriverType.contentEquals(DataAccess.S3)) { - ((S3AccessIO) baseStore).setMainDriver(false); - } - } - remoteStoreName = System.getProperty("dataverse.files." + this.driverId + ".remote-store-name"); - try { - remoteStoreUrl = new URL(System.getProperty("dataverse.files." + this.driverId + ".remote-store-url")); - } catch(MalformedURLException mfue) { - logger.fine("Unable to read remoteStoreUrl for driver: " + this.driverId); - } - } - - //Convenience method to assemble the path, starting with the DOI authority/identifier/, that is needed to create a base store via DataAccess.getDirectStorageIO - the caller has to add the store type specific prefix required. - private String getStoragePath() throws IOException { - String fullStoragePath = dvObject.getStorageIdentifier(); - logger.fine("storageidentifier: " + fullStoragePath); - int driverIndex = fullStoragePath.lastIndexOf(DataAccess.SEPARATOR); - if(driverIndex >=0) { - fullStoragePath = fullStoragePath.substring(fullStoragePath.lastIndexOf(DataAccess.SEPARATOR) + DataAccess.SEPARATOR.length()); - } - int suffixIndex = fullStoragePath.indexOf("//"); - if(suffixIndex >=0) { - fullStoragePath = fullStoragePath.substring(0, suffixIndex); - } - if (this.getDvObject() instanceof Dataset) { - fullStoragePath = this.getDataset().getAuthorityForFileStorage() + "/" - + this.getDataset().getIdentifierForFileStorage() + "/" + fullStoragePath; - } else if (this.getDvObject() instanceof DataFile) { - fullStoragePath = this.getDataFile().getOwner().getAuthorityForFileStorage() + "/" - + this.getDataFile().getOwner().getIdentifierForFileStorage() + "/" + fullStoragePath; - }else if (dvObject instanceof Dataverse) { - throw new IOException("RemoteOverlayAccessIO: Dataverses are not a supported dvObject"); - } - logger.fine("fullStoragePath: " + fullStoragePath); - return fullStoragePath; - } - - public CloseableHttpClient getSharedHttpClient() { - if (httpclient == null) { - try { - initHttpPool(); - httpclient = HttpClients.custom().setConnectionManager(cm).setDefaultRequestConfig(config).build(); - - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - logger.warning(ex.getMessage()); - } - } - return httpclient; - } - - private void initHttpPool() throws NoSuchAlgorithmException, KeyManagementException, KeyStoreException { - if (trustCerts) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - SSLConnectionSocketFactory connectionFactory; - - sslContext = SSLContextBuilder.create().loadTrustMaterial(new TrustAllStrategy()).build(); - // create an SSL Socket Factory to use the SSLContext with the trust self signed - // certificate strategy - // and allow all hosts verifier. - connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - Registry registry = RegistryBuilder.create() - .register("https", connectionFactory).build(); - cm = new PoolingHttpClientConnectionManager(registry); - } else { - cm = new PoolingHttpClientConnectionManager(); - } - cm.setDefaultMaxPerRoute(httpConcurrency); - cm.setMaxTotal(httpConcurrency > 20 ? httpConcurrency : 20); } @Override public void savePath(Path fileSystemPath) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: savePath() not implemented in this storage driver."); + this.getClass().getName() + ": savePath() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream() not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream() not implemented in this storage driver."); } @Override public void saveInputStream(InputStream inputStream, Long filesize) throws IOException { throw new UnsupportedDataAccessOperationException( - "RemoteOverlayAccessIO: saveInputStream(InputStream, Long) not implemented in this storage driver."); + this.getClass().getName() + ": saveInputStream(InputStream, Long) not implemented in this storage driver."); } - protected static boolean isValidIdentifier(String driverId, String storageId) { + static boolean isValidIdentifier(String driverId, String storageId) { String urlPath = storageId.substring(storageId.lastIndexOf("//") + 2); - String baseUrl = System.getProperty("dataverse.files." + driverId + ".base-url"); + String baseUrl = getConfigParamForDriver(driverId, BASE_URL); try { URI absoluteURI = new URI(baseUrl + "/" + urlPath); - if(!absoluteURI.normalize().toString().startsWith(baseUrl)) { + if (!absoluteURI.normalize().toString().startsWith(baseUrl)) { logger.warning("storageidentifier doesn't start with " + driverId + "'s base-url: " + storageId); return false; } - } catch(URISyntaxException use) { + } catch (URISyntaxException use) { logger.warning("Could not interpret storageidentifier in remote store " + driverId + " : " + storageId); return false; } return true; } - - public static String getBaseStoreIdFor(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".base-store"); - } - - @Override - public List cleanUp(Predicate filter, boolean dryRun) throws IOException { - return baseStore.cleanUp(filter, dryRun); - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index e3c6bfede7c..d2fdec7b323 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -4,6 +4,7 @@ import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.SdkClientException; +import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSCredentialsProviderChain; import com.amazonaws.auth.AWSStaticCredentialsProvider; import com.amazonaws.auth.BasicAWSCredentials; @@ -39,6 +40,7 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.FileUtil; import opennlp.tools.util.StringUtil; @@ -57,9 +59,11 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; +import java.util.Optional; import java.util.Random; import java.util.function.Predicate; import java.util.logging.Logger; @@ -88,6 +92,16 @@ public class S3AccessIO extends StorageIO { private static final Config config = ConfigProvider.getConfig(); private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.S3AccessIO"); + static final String URL_EXPIRATION_MINUTES = "url-expiration-minutes"; + static final String CUSTOM_ENDPOINT_URL = "custom-endpoint-url"; + static final String PROXY_URL = "proxy-url"; + static final String BUCKET_NAME = "bucket-name"; + static final String MIN_PART_SIZE = "min-part-size"; + static final String CUSTOM_ENDPOINT_REGION = "custom-endpoint-region"; + static final String PATH_STYLE_ACCESS = "path-style-access"; + static final String PAYLOAD_SIGNING = "payload-signing"; + static final String CHUNKED_ENCODING = "chunked-encoding"; + static final String PROFILE = "profile"; private boolean mainDriver = true; @@ -103,19 +117,18 @@ public S3AccessIO(T dvObject, DataAccessRequest req, String driverId) { minPartSize = getMinPartSize(driverId); s3=getClient(driverId); tm=getTransferManager(driverId); - endpoint = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); - proxy = System.getProperty("dataverse.files." + driverId + ".proxy-url", ""); + endpoint = getConfigParam(CUSTOM_ENDPOINT_URL, ""); + proxy = getConfigParam(PROXY_URL, ""); if(!StringUtil.isEmpty(proxy)&&StringUtil.isEmpty(endpoint)) { logger.severe(driverId + " config error: Must specify a custom-endpoint-url if proxy-url is specified"); } - //Not sure this is needed but moving it from the open method for now since it definitely doesn't need to run every time an object is opened. - try { - if (bucketName == null || !s3.doesBucketExistV2(bucketName)) { - throw new IOException("ERROR: S3AccessIO - You must create and configure a bucket before creating datasets."); - } - } catch (SdkClientException sce) { - throw new IOException("ERROR: S3AccessIO - Failed to look up bucket "+bucketName+" (is AWS properly configured?): " + sce.getMessage()); - } + + // FWIW: There used to be a check here to see if the bucket exists. + // It was very redundant (checking every time we access any file) and didn't do + // much but potentially make the failure (in the unlikely case a bucket doesn't + // exist/just disappeared) happen slightly earlier (here versus at the first + // file/metadata access). + } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", @@ -207,20 +220,14 @@ public void open(DataAccessOption... options) throws IOException { if (isReadAccess) { - key = getMainFileKey(); - ObjectMetadata objectMetadata = null; - try { - objectMetadata = s3.getObjectMetadata(bucketName, key); - } catch (SdkClientException sce) { - throw new IOException("Cannot get S3 object " + key + " ("+sce.getMessage()+")"); - } - this.setSize(objectMetadata.getContentLength()); + this.setSize(retrieveSizeFromMedia()); if (dataFile.getContentType() != null && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -849,7 +856,7 @@ private static String getMainFileKey(String baseKey, String storageIdentifier, S @Override public boolean downloadRedirectEnabled() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".download-redirect"); + String optionValue = getConfigParam(DOWNLOAD_REDIRECT); if ("true".equalsIgnoreCase(optionValue)) { return true; } @@ -985,7 +992,10 @@ private String generateTemporaryS3UploadUrl(String key, Date expiration) throws GeneratePresignedUrlRequest generatePresignedUrlRequest = new GeneratePresignedUrlRequest(bucketName, key).withMethod(HttpMethod.PUT).withExpiration(expiration); //Require user to add this header to indicate a temporary file - generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + final boolean taggingDisabled = JvmSettings.DISABLE_S3_TAGGING.lookupOptional(Boolean.class, this.driverId).orElse(false); + if (!taggingDisabled) { + generatePresignedUrlRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + } URL presignedUrl; try { @@ -1034,7 +1044,10 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String s } else { JsonObjectBuilder urls = Json.createObjectBuilder(); InitiateMultipartUploadRequest initiationRequest = new InitiateMultipartUploadRequest(bucketName, key); - initiationRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + final boolean taggingDisabled = JvmSettings.DISABLE_S3_TAGGING.lookupOptional(Boolean.class, this.driverId).orElse(false); + if (!taggingDisabled) { + initiationRequest.putCustomRequestHeader(Headers.S3_TAGGING, "dv-state=temp"); + } InitiateMultipartUploadResult initiationResponse = s3.initiateMultipartUpload(initiationRequest); String uploadId = initiationResponse.getUploadId(); for (int i = 1; i <= (fileSize / minPartSize) + (fileSize % minPartSize > 0 ? 1 : 0); i++) { @@ -1073,7 +1086,7 @@ public JsonObjectBuilder generateTemporaryS3UploadUrls(String globalId, String s } int getUrlExpirationMinutes() { - String optionValue = System.getProperty("dataverse.files." + this.driverId + ".url-expiration-minutes"); + String optionValue = getConfigParam(URL_EXPIRATION_MINUTES); if (optionValue != null) { Integer num; try { @@ -1089,7 +1102,7 @@ int getUrlExpirationMinutes() { } private static String getBucketName(String driverId) { - return System.getProperty("dataverse.files." + driverId + ".bucket-name"); + return getConfigParamForDriver(driverId, BUCKET_NAME); } private static long getMinPartSize(String driverId) { @@ -1097,7 +1110,7 @@ private static long getMinPartSize(String driverId) { // (minimum allowed is 5*1024**2 but it probably isn't worth the complexity starting at ~5MB. Also - confirmed that they use base 2 definitions) long min = 5 * 1024 * 1024l; - String partLength = System.getProperty("dataverse.files." + driverId + ".min-part-size"); + String partLength = getConfigParamForDriver(driverId, MIN_PART_SIZE); try { if (partLength != null) { long val = Long.parseLong(partLength); @@ -1146,12 +1159,12 @@ private static AmazonS3 getClient(String driverId) { * Pass in a URL pointing to your S3 compatible storage. * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html */ - String s3CEUrl = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-url", ""); + String s3CEUrl = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_URL, ""); /** * Pass in a region to use for SigV4 signing of requests. * Defaults to "dataverse" as it is not relevant for custom S3 implementations. */ - String s3CERegion = System.getProperty("dataverse.files." + driverId + ".custom-endpoint-region", "dataverse"); + String s3CERegion = getConfigParamForDriver(driverId, CUSTOM_ENDPOINT_REGION, "dataverse"); // if the admin has set a system property (see below) we use this endpoint URL instead of the standard ones. if (!s3CEUrl.isEmpty()) { @@ -1161,7 +1174,7 @@ private static AmazonS3 getClient(String driverId) { * Pass in a boolean value if path style access should be used within the S3 client. * Anything but case-insensitive "true" will lead to value of false, which is default value, too. */ - Boolean s3pathStyleAccess = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".path-style-access", "false")); + Boolean s3pathStyleAccess = Boolean.parseBoolean(getConfigParamForDriver(driverId, PATH_STYLE_ACCESS, "false")); // some custom S3 implementations require "PathStyleAccess" as they us a path, not a subdomain. default = false s3CB.withPathStyleAccessEnabled(s3pathStyleAccess); @@ -1169,41 +1182,70 @@ private static AmazonS3 getClient(String driverId) { * Pass in a boolean value if payload signing should be used within the S3 client. * Anything but case-insensitive "true" will lead to value of false, which is default value, too. */ - Boolean s3payloadSigning = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".payload-signing","false")); + Boolean s3payloadSigning = Boolean.parseBoolean(getConfigParamForDriver(driverId, PAYLOAD_SIGNING,"false")); /** * Pass in a boolean value if chunked encoding should not be used within the S3 client. * Anything but case-insensitive "false" will lead to value of true, which is default value, too. */ - Boolean s3chunkedEncoding = Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".chunked-encoding","true")); + Boolean s3chunkedEncoding = Boolean.parseBoolean(getConfigParamForDriver(driverId, CHUNKED_ENCODING,"true")); // Openstack SWIFT S3 implementations require "PayloadSigning" set to true. default = false s3CB.setPayloadSigningEnabled(s3payloadSigning); // Openstack SWIFT S3 implementations require "ChunkedEncoding" set to false. default = true // Boolean is inverted, otherwise setting dataverse.files..chunked-encoding=false would result in leaving Chunked Encoding enabled s3CB.setChunkedEncodingDisabled(!s3chunkedEncoding); - /** - * Pass in a string value if this storage driver should use a non-default AWS S3 profile. - * The default is "default" which should work when only one profile exists. + /** Configure credentials for the S3 client. There are multiple mechanisms available. + * Role-based/instance credentials are globally defined while the other mechanisms (profile, static) + * are defined per store. The logic below assures that + * * if a store specific profile or static credentials are explicitly set, they will be used in preference to the global role-based credentials. + * * if a store specific role-based credentials are explicitly set, they will be used in preference to the global instance credentials, + * * if a profile and static credentials are both explicitly set, the profile will be used preferentially, and + * * if no store-specific credentials are set, the global credentials will be preferred over using any "default" profile credentials that are found. */ - String s3profile = System.getProperty("dataverse.files." + driverId + ".profile","default"); - ProfileCredentialsProvider profileCredentials = new ProfileCredentialsProvider(s3profile); - - // Try to retrieve credentials via Microprofile Config API, too. For production use, you should not use env - // vars or system properties to provide these, but use the secrets config source provided by Payara. - AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class).orElse(""), - config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class).orElse("") - )); - - //Add role-based provider as in the default provider chain - InstanceProfileCredentialsProvider instanceCredentials = InstanceProfileCredentialsProvider.getInstance(); + + ArrayList providers = new ArrayList<>(); + + String s3profile = getConfigParamForDriver(driverId, PROFILE); + boolean allowInstanceCredentials = true; + // Assume that instance credentials should not be used if the profile is + // actually set for this store or if static creds are provided (below). + if (s3profile != null) { + allowInstanceCredentials = false; + } + // Try to retrieve credentials via Microprofile Config API, too. For production + // use, you should not use env vars or system properties to provide these, but + // use the secrets config source provided by Payara. + Optional accessKey = config.getOptionalValue("dataverse.files." + driverId + ".access-key", String.class); + Optional secretKey = config.getOptionalValue("dataverse.files." + driverId + ".secret-key", String.class); + if (accessKey.isPresent() && secretKey.isPresent()) { + allowInstanceCredentials = false; + AWSStaticCredentialsProvider staticCredentials = new AWSStaticCredentialsProvider( + new BasicAWSCredentials( + accessKey.get(), + secretKey.get())); + providers.add(staticCredentials); + } else if (s3profile == null) { + //Only use the default profile when it isn't explicitly set for this store when there are no static creds (otherwise it will be preferred). + s3profile = "default"; + } + if (s3profile != null) { + providers.add(new ProfileCredentialsProvider(s3profile)); + } + + if (allowInstanceCredentials) { + // Add role-based provider as in the default provider chain + providers.add(InstanceProfileCredentialsProvider.getInstance()); + } // Add all providers to chain - the first working provider will be used - // (role-based is first in the default cred provider chain, so we're just + // (role-based is first in the default cred provider chain (if no profile or + // static creds are explicitly set for the store), so we're just // reproducing that, then profile, then static credentials as the fallback) - AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(instanceCredentials, profileCredentials, staticCredentials); + + // As the order is the reverse of how we added providers, we reverse the list here + Collections.reverse(providers); + AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providers); s3CB.setCredentials(providerChain); - + // let's build the client :-) AmazonS3 client = s3CB.build(); driverClientMap.put(driverId, client); @@ -1385,4 +1427,20 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + key = getMainFileKey(); + ObjectMetadata objectMetadata = null; + try { + objectMetadata = s3.getObjectMetadata(bucketName, key); + } catch (SdkClientException sce) { + throw new IOException("Cannot get S3 object " + key + " (" + sce.getMessage() + ")"); + } + return objectMetadata.getContentLength(); + } + + public static String getNewIdentifier(String driverId) { + return driverId + DataAccess.SEPARATOR + getConfigParamForDriver(driverId, BUCKET_NAME) + ":" + FileUtil.generateStorageIdentifier(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java index d33f8f5e5bd..8d3efa79b51 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/StorageIO.java @@ -20,12 +20,12 @@ package edu.harvard.iq.dataverse.dataaccess; - import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.datavariable.DataVariable; +import edu.harvard.iq.dataverse.util.FileUtil; import java.io.IOException; import java.io.InputStream; @@ -43,7 +43,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; - /** * * @author Leonid Andreev @@ -52,18 +51,26 @@ public abstract class StorageIO { + static final String INGEST_SIZE_LIMIT = "ingestsizelimit"; + static final String PUBLIC = "public"; + static final String TYPE = "type"; + static final String UPLOAD_REDIRECT = "upload-redirect"; + static final String UPLOAD_OUT_OF_BAND = "upload-out-of-band"; + protected static final String DOWNLOAD_REDIRECT = "download-redirect"; + + public StorageIO() { } - + public StorageIO(String storageLocation, String driverId) { - this.driverId=driverId; + this.driverId = driverId; } public StorageIO(T dvObject, DataAccessRequest req, String driverId) { this.dvObject = dvObject; this.req = req; - this.driverId=driverId; + this.driverId = driverId; if (this.req == null) { this.req = new DataAccessRequest(); } @@ -72,18 +79,19 @@ public StorageIO(T dvObject, DataAccessRequest req, String driverId) { } } - - // Abstract methods to be implemented by the storage drivers: public abstract void open(DataAccessOption... option) throws IOException; protected boolean isReadAccess = false; protected boolean isWriteAccess = false; - //A public store is one in which files may be accessible outside Dataverse and therefore accessible without regard to Dataverse's access controls related to restriction and embargoes. - //Currently, this is just used to warn users at upload time rather than disable restriction/embargo. + // A public store is one in which files may be accessible outside Dataverse and + // therefore accessible without regard to Dataverse's access controls related to + // restriction and embargoes. + // Currently, this is just used to warn users at upload time rather than disable + // restriction/embargo. static protected Map driverPublicAccessMap = new HashMap(); - + public boolean canRead() { return isReadAccess; } @@ -94,115 +102,118 @@ public boolean canWrite() { public abstract String getStorageLocation() throws IOException; - // This method will return a Path, if the storage method is a - // local filesystem. Otherwise should throw an IOException. + // This method will return a Path, if the storage method is a + // local filesystem. Otherwise should throw an IOException. public abstract Path getFileSystemPath() throws IOException; - - public abstract boolean exists() throws IOException; - + + public abstract boolean exists() throws IOException; + public abstract void delete() throws IOException; - + // this method for copies a local Path (for ex., a // temp file, into this DataAccess location): public abstract void savePath(Path fileSystemPath) throws IOException; - + // same, for an InputStream: /** - * This method copies a local InputStream into this DataAccess location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess location. Note + * that the S3 driver implementation of this abstract method is problematic, + * because S3 cannot save an object of an unknown length. This effectively + * nullifies any benefits of streaming; as we cannot start saving until we have + * read the entire stream. One way of solving this would be to buffer the entire + * stream as byte[], in memory, then save it... Which of course would be limited + * by the amount of memory available, and thus would not work for streams larger + * than that. So we have eventually decided to save save the stream to a temp + * file, then save to S3. This is slower, but guaranteed to work on any size + * stream. An alternative we may want to consider is to not implement this + * method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ + */ public abstract void saveInputStream(InputStream inputStream) throws IOException; + public abstract void saveInputStream(InputStream inputStream, Long filesize) throws IOException; - + // Auxiliary File Management: (new as of 4.0.2!) - + // An "auxiliary object" is an abstraction of the traditional DVN/Dataverse - // mechanism of storing extra files related to the man StudyFile/DataFile - - // such as "saved original" and cached format conversions for tabular files, - // thumbnails for images, etc. - in physical files with the same file - // name but various reserved extensions. - - //This function retrieves auxiliary files related to datasets, and returns them as inputstream - public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException ; - + // mechanism of storing extra files related to the man StudyFile/DataFile - + // such as "saved original" and cached format conversions for tabular files, + // thumbnails for images, etc. - in physical files with the same file + // name but various reserved extensions. + + // This function retrieves auxiliary files related to datasets, and returns them + // as inputstream + public abstract InputStream getAuxFileAsInputStream(String auxItemTag) throws IOException; + public abstract Channel openAuxChannel(String auxItemTag, DataAccessOption... option) throws IOException; - - public abstract long getAuxObjectSize(String auxItemTag) throws IOException; - - public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; - - public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; - - public abstract void backupAsAux(String auxItemTag) throws IOException; - - public abstract void revertBackupAsAux(String auxItemTag) throws IOException; - - // this method copies a local filesystem Path into this DataAccess Auxiliary location: + + public abstract long getAuxObjectSize(String auxItemTag) throws IOException; + + public abstract Path getAuxObjectAsPath(String auxItemTag) throws IOException; + + public abstract boolean isAuxObjectCached(String auxItemTag) throws IOException; + + public abstract void backupAsAux(String auxItemTag) throws IOException; + + public abstract void revertBackupAsAux(String auxItemTag) throws IOException; + + // this method copies a local filesystem Path into this DataAccess Auxiliary + // location: public abstract void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOException; - + /** - * This method copies a local InputStream into this DataAccess Auxiliary location. - * Note that the S3 driver implementation of this abstract method is problematic, - * because S3 cannot save an object of an unknown length. This effectively - * nullifies any benefits of streaming; as we cannot start saving until we - * have read the entire stream. - * One way of solving this would be to buffer the entire stream as byte[], - * in memory, then save it... Which of course would be limited by the amount - * of memory available, and thus would not work for streams larger than that. - * So we have eventually decided to save save the stream to a temp file, then - * save to S3. This is slower, but guaranteed to work on any size stream. - * An alternative we may want to consider is to not implement this method - * in the S3 driver, and make it throw the UnsupportedDataAccessOperationException, - * similarly to how we handle attempts to open OutputStreams, in this and the - * Swift driver. - * (Not an issue in either FileAccessIO or SwiftAccessIO implementations) + * This method copies a local InputStream into this DataAccess Auxiliary + * location. Note that the S3 driver implementation of this abstract method is + * problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. (Not an issue in either + * FileAccessIO or SwiftAccessIO implementations) * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. - */ - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; - public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) throws IOException; - - public abstract ListlistAuxObjects() throws IOException; - - public abstract void deleteAuxObject(String auxItemTag) throws IOException; - + */ + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag) throws IOException; + + public abstract void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Long filesize) + throws IOException; + + public abstract List listAuxObjects() throws IOException; + + public abstract void deleteAuxObject(String auxItemTag) throws IOException; + public abstract void deleteAllAuxObjects() throws IOException; private DataAccessRequest req; private InputStream in = null; - private OutputStream out; + private OutputStream out; protected Channel channel; protected DvObject dvObject; protected String driverId; - /*private int status;*/ + /* private int status; */ private long size; /** - * Where in the file to seek to when reading (default is zero bytes, the - * start of the file). + * Where in the file to seek to when reading (default is zero bytes, the start + * of the file). */ private long offset; - + private String mimeType; private String fileName; private String varHeader; @@ -215,8 +226,8 @@ public boolean canWrite() { private String swiftContainerName; private boolean isLocalFile = false; - /*private boolean isRemoteAccess = false;*/ - /*private boolean isHttpAccess = false;*/ + /* private boolean isRemoteAccess = false; */ + /* private boolean isHttpAccess = false; */ private boolean noVarHeader = false; // For remote downloads: @@ -229,13 +240,14 @@ public boolean canWrite() { private String remoteUrl; protected String remoteStoreName = null; protected URL remoteStoreUrl = null; - + // For HTTP-based downloads: - /*private GetMethod method = null; - private Header[] responseHeaders;*/ + /* + * private GetMethod method = null; private Header[] responseHeaders; + */ // getters: - + public Channel getChannel() throws IOException { return channel; } @@ -255,16 +267,15 @@ public ReadableByteChannel getReadChannel() throws IOException { return (ReadableByteChannel) channel; } - - public DvObject getDvObject() - { + + public DvObject getDvObject() { return dvObject; } - + public DataFile getDataFile() { return (DataFile) dvObject; } - + public Dataset getDataset() { return (Dataset) dvObject; } @@ -277,9 +288,9 @@ public DataAccessRequest getRequest() { return req; } - /*public int getStatus() { - return status; - }*/ + /* + * public int getStatus() { return status; } + */ public long getSize() { return size; @@ -292,9 +303,9 @@ public long getOffset() { public InputStream getInputStream() throws IOException { return in; } - + public OutputStream getOutputStream() throws IOException { - return out; + return out; } public String getMimeType() { @@ -317,23 +328,23 @@ public String getRemoteUrl() { return remoteUrl; } - public String getTemporarySwiftUrl(){ + public String getTemporarySwiftUrl() { return temporarySwiftUrl; } - + public String getTempUrlExpiry() { return tempUrlExpiry; } - + public String getTempUrlSignature() { return tempUrlSignature; } - + public String getSwiftFileName() { return swiftFileName; } - public String getSwiftContainerName(){ + public String getSwiftContainerName() { return swiftContainerName; } @@ -344,34 +355,32 @@ public String getRemoteStoreName() { public URL getRemoteStoreUrl() { return remoteStoreUrl; } - - /*public GetMethod getHTTPMethod() { - return method; - } - public Header[] getResponseHeaders() { - return responseHeaders; - }*/ + /* + * public GetMethod getHTTPMethod() { return method; } + * + * public Header[] getResponseHeaders() { return responseHeaders; } + */ public boolean isLocalFile() { return isLocalFile; } - - // "Direct Access" StorageIO is used to access a physical storage - // location not associated with any dvObject. (For example, when we - // are deleting a physical file left behind by a DataFile that's - // already been deleted from the database). + + // "Direct Access" StorageIO is used to access a physical storage + // location not associated with any dvObject. (For example, when we + // are deleting a physical file left behind by a DataFile that's + // already been deleted from the database). public boolean isDirectAccess() { - return dvObject == null; + return dvObject == null; } - /*public boolean isRemoteAccess() { - return isRemoteAccess; - }*/ + /* + * public boolean isRemoteAccess() { return isRemoteAccess; } + */ - /*public boolean isHttpAccess() { - return isHttpAccess; - }*/ + /* + * public boolean isHttpAccess() { return isHttpAccess; } + */ public boolean isDownloadSupported() { return isDownloadSupported; @@ -398,9 +407,9 @@ public void setRequest(DataAccessRequest dar) { req = dar; } - /*public void setStatus(int s) { - status = s; - }*/ + /* + * public void setStatus(int s) { status = s; } + */ public void setSize(long s) { size = s; @@ -421,11 +430,11 @@ public void setOffset(long offset) throws IOException { public void setInputStream(InputStream is) { in = is; } - + public void setOutputStream(OutputStream os) { - out = os; - } - + out = os; + } + public void setChannel(Channel c) { channel = c; } @@ -450,45 +459,46 @@ public void setRemoteUrl(String u) { remoteUrl = u; } - public void setTemporarySwiftUrl(String u){ + public void setTemporarySwiftUrl(String u) { temporarySwiftUrl = u; } - - public void setTempUrlExpiry(Long u){ + + public void setTempUrlExpiry(Long u) { tempUrlExpiry = String.valueOf(u); } - + public void setSwiftFileName(String u) { swiftFileName = u; } - - public void setTempUrlSignature(String u){ + + public void setTempUrlSignature(String u) { tempUrlSignature = u; } - public void setSwiftContainerName(String u){ + public void setSwiftContainerName(String u) { swiftContainerName = u; } - /*public void setHTTPMethod(GetMethod hm) { - method = hm; - }*/ + /* + * public void setHTTPMethod(GetMethod hm) { method = hm; } + */ - /*public void setResponseHeaders(Header[] headers) { - responseHeaders = headers; - }*/ + /* + * public void setResponseHeaders(Header[] headers) { responseHeaders = headers; + * } + */ public void setIsLocalFile(boolean f) { isLocalFile = f; } - /*public void setIsRemoteAccess(boolean r) { - isRemoteAccess = r; - }*/ + /* + * public void setIsRemoteAccess(boolean r) { isRemoteAccess = r; } + */ - /*public void setIsHttpAccess(boolean h) { - isHttpAccess = h; - }*/ + /* + * public void setIsHttpAccess(boolean h) { isHttpAccess = h; } + */ public void setIsDownloadSupported(boolean d) { isDownloadSupported = d; @@ -506,12 +516,11 @@ public void setNoVarHeader(boolean nvh) { noVarHeader = nvh; } - // connection management methods: - /*public void releaseConnection() { - if (method != null) { - method.releaseConnection(); - } - }*/ + // connection management methods: + /* + * public void releaseConnection() { if (method != null) { + * method.releaseConnection(); } } + */ public void closeInputStream() { if (in != null) { @@ -528,7 +537,7 @@ public void closeInputStream() { } } } - + public String generateVariableHeader(List dvs) { String varHeader = null; @@ -571,14 +580,14 @@ protected boolean isWriteAccessRequested(DataAccessOption... options) throws IOE return false; } - public boolean isBelowIngestSizeLimit() { - long limit = Long.parseLong(System.getProperty("dataverse.files." + this.driverId + ".ingestsizelimit", "-1")); - if(limit>0 && getSize()>limit) { - return false; - } else { - return true; - } - } + public boolean isBelowIngestSizeLimit() { + long limit = Long.parseLong(getConfigParam(INGEST_SIZE_LIMIT, "-1")); + if (limit > 0 && getSize() > limit) { + return false; + } else { + return true; + } + } public boolean downloadRedirectEnabled() { return false; @@ -587,36 +596,43 @@ public boolean downloadRedirectEnabled() { public boolean downloadRedirectEnabled(String auxObjectTag) { return false; } - - public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) throws IOException { + + public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliaryType, String auxiliaryFileName) + throws IOException { throw new UnsupportedDataAccessOperationException("Direct download not implemented for this storage type"); } - public static boolean isPublicStore(String driverId) { - //Read once and cache - if(!driverPublicAccessMap.containsKey(driverId)) { - driverPublicAccessMap.put(driverId, Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".public"))); + // Read once and cache + if (!driverPublicAccessMap.containsKey(driverId)) { + driverPublicAccessMap.put(driverId, + Boolean.parseBoolean(getConfigParamForDriver(driverId, PUBLIC))); } return driverPublicAccessMap.get(driverId); } - + public static String getDriverPrefix(String driverId) { - return driverId+ DataAccess.SEPARATOR; + return driverId + DataAccess.SEPARATOR; } - + public static boolean isDirectUploadEnabled(String driverId) { - return (System.getProperty("dataverse.files." + driverId + ".type").equals(DataAccess.S3) && Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-redirect"))) || - Boolean.parseBoolean(System.getProperty("dataverse.files." + driverId + ".upload-out-of-band")); + return (getConfigParamForDriver(driverId, TYPE).equals(DataAccess.S3) + && Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_REDIRECT))) + || Boolean.parseBoolean(getConfigParamForDriver(driverId, UPLOAD_OUT_OF_BAND)); + } + + //True by default, Stores (e.g. RemoteOverlay, Globus) can set this false to stop attempts to read bytes + public static boolean isDataverseAccessible(String driverId) { + return (true && !Boolean.parseBoolean(StorageIO.getConfigParamForDriver(driverId, AbstractRemoteOverlayAccessIO.FILES_NOT_ACCESSIBLE_BY_DATAVERSE))); } - //Check that storageIdentifier is consistent with store's config - //False will prevent direct uploads - protected static boolean isValidIdentifier(String driverId, String storageId) { + // Check that storageIdentifier is consistent with store's config + // False will prevent direct uploads + static boolean isValidIdentifier(String driverId, String storageId) { return false; } - - //Utility to verify the standard UUID pattern for stored files. + + // Utility to verify the standard UUID pattern for stored files. protected static boolean usesStandardNamePattern(String identifier) { Pattern r = Pattern.compile("^[a-f,0-9]{11}-[a-f,0-9]{12}$"); @@ -626,4 +642,44 @@ protected static boolean usesStandardNamePattern(String identifier) { public abstract List cleanUp(Predicate filter, boolean dryRun) throws IOException; + /** + * A storage-type-specific mechanism for retrieving the size of a file. Intended + * primarily as a way to get the size before it has been recorded in the + * database, e.g. during direct/out-of-band transfers but could be useful to + * check the db values. + * + * @return file size in bytes + * @throws IOException + */ + public abstract long retrieveSizeFromMedia() throws IOException; + + + /* Convenience methods to get a driver-specific parameter + * + * - with or without a default + * - static or per object + * + * @param parameterName + * @return the parameter value + */ + + protected String getConfigParam(String parameterName) { + return getConfigParam(parameterName, null); + } + + protected String getConfigParam(String parameterName, String defaultValue) { + return getConfigParamForDriver(this.driverId, parameterName, defaultValue); + } + + protected static String getConfigParamForDriver(String driverId, String parameterName) { + return getConfigParamForDriver(driverId, parameterName, null); + } + protected static String getConfigParamForDriver(String driverId, String parameterName, String defaultValue) { + return System.getProperty("dataverse.files." + driverId + "." + parameterName, defaultValue); + } + + public static String getNewIdentifier(String driverId) { + return driverId + DataAccess.SEPARATOR + FileUtil.generateStorageIdentifier(); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java index 6c84009de3e..717f46ffd60 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/SwiftAccessIO.java @@ -50,6 +50,17 @@ public class SwiftAccessIO extends StorageIO { private String swiftLocation; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO"); + private static final String IS_PUBLIC_CONTAINER = "isPublicContainer"; + private static final String FOLDER_PATH_SEPARATOR = "folderPathSeparator"; + private static final String DEFAULT_ENDPOINT = "defaultEndpoint"; + private static final String TEMPORARY_URL_EXPIRY_TIME = "temporaryUrlExpiryTime"; + private static final String AUTH_URL = "authUrl"; + private static final String USERNAME = "username"; + private static final String PASSWORD = "password"; + private static final String TENANT = "tenant"; + private static final String AUTH_TYPE = "authType"; + private static final String HASH_KEY = "hashKey"; + private static final String ENDPOINT = "endpoint"; public SwiftAccessIO() { //Partially functional StorageIO object - constructor only for testing @@ -70,10 +81,10 @@ public SwiftAccessIO(String swiftLocation, String driverId) { } private void readSettings() { - isPublicContainer = Boolean.parseBoolean(System.getProperty("dataverse.files." + this.driverId + ".isPublicContainer", "true")); - swiftFolderPathSeparator = System.getProperty("dataverse.files." + this.driverId + ".folderPathSeparator", "_"); - swiftDefaultEndpoint = System.getProperty("dataverse.files." + this.driverId + ".defaultEndpoint"); - tempUrlExpires = Integer.parseInt(System.getProperty("dataverse.files." + this.driverId + ".temporaryUrlExpiryTime", "60")); + isPublicContainer = Boolean.parseBoolean(getConfigParam(IS_PUBLIC_CONTAINER, "true")); + swiftFolderPathSeparator = getConfigParam(FOLDER_PATH_SEPARATOR, "_"); + swiftDefaultEndpoint = getConfigParam(DEFAULT_ENDPOINT); + tempUrlExpires = Integer.parseInt(getConfigParam(TEMPORARY_URL_EXPIRY_TIME, "60")); } @@ -131,7 +142,8 @@ public void open(DataAccessOption... options) throws IOException { && dataFile.getContentType().equals("text/tab-separated-values") && dataFile.isTabularData() && dataFile.getDataTable() != null - && (!this.noVarHeader())) { + && (!this.noVarHeader()) + && (!dataFile.getDataTable().isStoredWithVariableHeader())) { List datavariables = dataFile.getDataTable().getDataVariables(); String varHeaderLine = generateVariableHeader(datavariables); @@ -740,12 +752,12 @@ private StoredObject openSwiftAuxFile(boolean writeAccess, String auxItemTag) th } Account authenticateWithSwift(String swiftEndPoint) throws IOException { - String swiftEndPointAuthUrl = System.getProperty("dataverse.files." + this.driverId + ".authUrl." + swiftEndPoint); - String swiftEndPointUsername = System.getProperty("dataverse.files." + this.driverId + ".username." + swiftEndPoint); - String swiftEndPointSecretKey = System.getProperty("dataverse.files." + this.driverId + ".password." + swiftEndPoint); - String swiftEndPointTenantName = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); - String swiftEndPointAuthMethod = System.getProperty("dataverse.files." + this.driverId + ".authType." + swiftEndPoint); - String swiftEndPointTenantId = System.getProperty("dataverse.files." + this.driverId + ".tenant." + swiftEndPoint); + String swiftEndPointAuthUrl = getConfigParam(AUTH_URL + "." + swiftEndPoint); + String swiftEndPointUsername = getConfigParam(USERNAME + "." + swiftEndPoint); + String swiftEndPointSecretKey = getConfigParam(PASSWORD + "." + swiftEndPoint); + String swiftEndPointTenantName = getConfigParam(TENANT + "." + swiftEndPoint); + String swiftEndPointAuthMethod = getConfigParam(AUTH_TYPE + "." + swiftEndPoint); + String swiftEndPointTenantId = getConfigParam(TENANT + "." + swiftEndPoint); if (swiftEndPointAuthUrl == null || swiftEndPointUsername == null || swiftEndPointSecretKey == null || "".equals(swiftEndPointAuthUrl) || "".equals(swiftEndPointUsername) || "".equals(swiftEndPointSecretKey)) { @@ -814,7 +826,7 @@ private String getSwiftFileURI(StoredObject fileObject) throws IOException { private String hmac = null; public String generateTempUrlSignature(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { if (hmac == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { - String secretKey = System.getProperty("dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); + String secretKey = getConfigParam(HASH_KEY + "." + swiftEndPoint); if (secretKey == null) { throw new IOException("Please input a hash key under dataverse.files." + this.driverId + ".hashKey." + swiftEndPoint); } @@ -841,7 +853,7 @@ public long generateTempUrlExpiry(int duration, long currentTime) { private String temporaryUrl = null; private String generateTemporarySwiftUrl(String swiftEndPoint, String containerName, String objectName, int duration) throws IOException { - String baseUrl = System.getProperty("dataverse.files." + this.driverId + ".endpoint." + swiftEndPoint); + String baseUrl = getConfigParam(ENDPOINT + "." + swiftEndPoint); String path = "/v1/" + containerName + "/" + objectName; if (temporaryUrl == null || isExpiryExpired(generateTempUrlExpiry(duration, System.currentTimeMillis()), duration, System.currentTimeMillis())) { @@ -954,4 +966,9 @@ public List cleanUp(Predicate filter, boolean dryRun) throws IOE } return toDelete; } + + @Override + public long retrieveSizeFromMedia() throws IOException { + throw new UnsupportedDataAccessOperationException("InputStreamIO: this method is not supported in this DataAccess driver."); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java index 782f7f3a52d..c369010c8cd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @@ -60,305 +60,26 @@ public class TabularSubsetGenerator implements SubsetGenerator { - private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); + private static Logger logger = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); - private static int COLUMN_TYPE_STRING = 1; - private static int COLUMN_TYPE_LONG = 2; - private static int COLUMN_TYPE_DOUBLE = 3; - private static int COLUMN_TYPE_FLOAT = 4; - - private static int MAX_COLUMN_BUFFER = 8192; - - private FileChannel fileChannel = null; - - private int varcount; - private int casecount; - private int subsetcount; - - private byte[][] columnEntries = null; - - - private ByteBuffer[] columnByteBuffers; - private int[] columnBufferSizes; - private int[] columnBufferOffsets; - - private long[] columnStartOffsets; - private long[] columnTotalOffsets; - private long[] columnTotalLengths; - - public TabularSubsetGenerator() { - - } - - public TabularSubsetGenerator (DataFile datafile, List variables) throws IOException { - if (!datafile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - setVarCount(datafile.getDataTable().getVarQuantity().intValue()); - setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); - - - - StorageIO dataAccess = datafile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); + //private static int MAX_COLUMN_BUFFER = 8192; - File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount()); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); - - fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); - - if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) { - throw new IOException("Illegal number of variables in the subset request"); - } - - subsetcount = variables.size(); - columnTotalOffsets = new long[subsetcount]; - columnTotalLengths = new long[subsetcount]; - columnByteBuffers = new ByteBuffer[subsetcount]; - - + public TabularSubsetGenerator() { - if (subsetcount == 1) { - if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); - columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder()); - columnTotalOffsets[0] = 0; - } else { - columnEntries = new byte[subsetcount][]; - - columnBufferSizes = new int[subsetcount]; - columnBufferOffsets = new int[subsetcount]; - columnStartOffsets = new long[subsetcount]; - - int i = 0; - for (DataVariable var : variables) { - if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) { - throw new IOException("Variable in the subset request does not belong to the datafile."); - } - columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder()); - columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder()); - if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) { - columnByteBuffers[i].limit((int)columnTotalLengths[i]); - } - fileChannel.position(columnStartOffsets[i]); - columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]); - columnBufferOffsets[i] = 0; - columnTotalOffsets[i] = columnBufferSizes[i]; - i++; - } - } - } - - private int getVarCount() { - return varcount; } - private void setVarCount(int varcount) { - this.varcount = varcount; - } - - private int getCaseCount() { - return casecount; - } - - private void setCaseCount(int casecount) { - this.casecount = casecount; - } - - - /* - * Note that this method operates on the *absolute* column number, i.e. - * the number of the physical column in the tabular file. This is stored - * in DataVariable.FileOrder. - * This "column number" should not be confused with the number of column - * in the subset request; a user can request any number of variable - * columns, in an order that doesn't have to follow the physical order - * of the columns in the file. - */ - private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnOffset; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - } else { - columnOffset = getVarCount() * 8; - } - return columnOffset; - } - - /* - * See the comment for the method above. + /** + * This class used to be much more complex. There were methods for subsetting + * from fixed-width field files; including using the optimized, "90 deg. rotated" + * versions of such files (i.e. you create a *columns-wise* copy of your data + * file in which the columns are stored sequentially, and a table of byte + * offsets of each column. You can then read individual variable columns + * for cheap; at the expense of doubling the storage size of your tabular + * data files. These methods were not used, so they were deleted (in Jan. 2024 + * prior to 6.2. + * Please consult git history if you are interested in looking at that code. */ - private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException { - if (columnEndOffsets == null || columnEndOffsets.length <= column) { - throw new IOException("Offsets table not initialized; or column out of bounds."); - } - long columnLength; - - if (column > 0) { - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnLength = columnEndOffsets[0] - varcount * 8; - } - - return columnLength; - } - - - private void bufferMoreColumnBytes(int column) throws IOException { - if (columnTotalOffsets[column] >= columnTotalLengths[column]) { - throw new IOException("attempt to buffer bytes past the column boundary"); - } - fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]); - - columnByteBuffers[column].clear(); - if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) { - dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes"); - columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column])); - } - columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]); - dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column); - columnBufferOffsets[column] = 0; - columnTotalOffsets[column] += columnBufferSizes[column]; - } - - public byte[] readColumnEntryBytes(int column) { - return readColumnEntryBytes(column, true); - } - - - public byte[] readColumnEntryBytes(int column, boolean addTabs) { - byte[] leftover = null; - byte[] ret = null; - - if (columnBufferOffsets[column] >= columnBufferSizes[column]) { - try { - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - } catch (IOException ioe) { - return null; - } - } - - int byteindex = columnBufferOffsets[column]; - try { - while (columnByteBuffers[column].array()[byteindex] != '\n') { - byteindex++; - if (byteindex == columnBufferSizes[column]) { - // save the leftover: - if (leftover == null) { - leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]); - } else { - byte[] merged = new byte[leftover.length + columnBufferSizes[column]]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]); - leftover = merged; - merged = null; - } - // read more bytes: - bufferMoreColumnBytes(column); - if (columnBufferSizes[column] < 1) { - return null; - } - byteindex = 0; - } - } - - // presumably, we have found our '\n': - if (leftover == null) { - ret = new byte[byteindex - columnBufferOffsets[column] + 1]; - System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1); - } else { - ret = new byte[leftover.length + byteindex + 1]; - System.arraycopy(leftover, 0, ret, 0, leftover.length); - System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1); - } - - } catch (IOException ioe) { - return null; - } - - columnBufferOffsets[column] = (byteindex + 1); - - if (column < columnBufferOffsets.length - 1) { - ret[ret.length - 1] = '\t'; - } - return ret; - } - - public int readSingleColumnSubset(byte[] buffer) throws IOException { - if (columnTotalOffsets[0] == columnTotalLengths[0]) { - return -1; - } - - if (columnByteBuffers[0] == null) { - dbgLog.fine("allocating single column subset buffer."); - columnByteBuffers[0] = ByteBuffer.allocate(buffer.length); - } - - int bytesread = fileChannel.read(columnByteBuffers[0]); - dbgLog.fine("single column subset: read "+bytesread+" bytes."); - if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) { - bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]); - } - System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread); - - columnTotalOffsets[0] += bytesread; - columnByteBuffers[0].clear(); - return bytesread > 0 ? bytesread : -1; - } - - - public byte[] readSubsetLineBytes() throws IOException { - byte[] ret = null; - int total = 0; - for (int i = 0; i < subsetcount; i++) { - columnEntries[i] = readColumnEntryBytes(i); - if (columnEntries[i] == null) { - throw new IOException("Failed to read subset line entry"); - } - total += columnEntries[i].length; - } - - ret = new byte[total]; - int offset = 0; - for (int i = 0; i < subsetcount; i++) { - System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length); - offset += columnEntries[i].length; - } - dbgLog.fine("line: "+new String(ret)); - return ret; - } - - - public void close() { - if (fileChannel != null) { - try { - fileChannel.close(); - } catch (IOException ioe) { - // don't care. - } - } - } - public void subsetFile(String infile, String outfile, List columns, Long numCases) { subsetFile(infile, outfile, columns, numCases, "\t"); } @@ -411,11 +132,15 @@ public void subsetFile(InputStream in, String outfile, List columns, Lo * files, OK to use on small files: */ - public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { + public static Double[] subsetDoubleVector(InputStream in, int column, int numCases, boolean skipHeader) { Double[] retVector = new Double[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -463,11 +188,15 @@ public static Double[] subsetDoubleVector(InputStream in, int column, int numCas * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Float[] subsetFloatVector(InputStream in, int column, int numCases) { + public static Float[] subsetFloatVector(InputStream in, int column, int numCases, boolean skipHeader) { Float[] retVector = new Float[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -513,11 +242,15 @@ public static Float[] subsetFloatVector(InputStream in, int column, int numCases * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static Long[] subsetLongVector(InputStream in, int column, int numCases) { + public static Long[] subsetLongVector(InputStream in, int column, int numCases, boolean skipHeader) { Long[] retVector = new Long[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -549,11 +282,15 @@ public static Long[] subsetLongVector(InputStream in, int column, int numCases) * Same deal as with the method above - straightforward, but (potentially) slow. * Not a resource hog though - will only try to store one vector in memory. */ - public static String[] subsetStringVector(InputStream in, int column, int numCases) { + public static String[] subsetStringVector(InputStream in, int column, int numCases, boolean skipHeader) { String[] retVector = new String[numCases]; try (Scanner scanner = new Scanner(in)) { scanner.useDelimiter("\\n"); + if (skipHeader) { + skipFirstLine(scanner); + } + for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { if (scanner.hasNext()) { String[] line = (scanner.next()).split("\t", -1); @@ -621,819 +358,10 @@ public static String[] subsetStringVector(InputStream in, int column, int numCas } - /* - * Straightforward method for subsetting a tab-delimited data file, extracting - * all the columns representing continuous variables and returning them as - * a 2-dimensional array of Doubles; - * Inefficient on large files, OK to use on small ones. - */ - public static Double[][] subsetDoubleVectors(InputStream in, Set columns, int numCases) throws IOException { - Double[][] retVector = new Double[columns.size()][numCases]; - try (Scanner scanner = new Scanner(in)) { - scanner.useDelimiter("\\n"); - - for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - int j = 0; - for (Integer i : columns) { - try { - // TODO: verify that NaN and +-Inf are going to be - // handled correctly here! -- L.A. - // NO, "+-Inf" is not handled correctly; see the - // comment further down below. - retVector[j][caseIndex] = new Double(line[i]); - } catch (NumberFormatException ex) { - retVector[j][caseIndex] = null; // missing value - } - j++; - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - - int tailIndex = numCases; - while (scanner.hasNext()) { - String nextLine = scanner.next(); - if (!"".equals(nextLine)) { - throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); - } - tailIndex++; - } - - } - return retVector; - - } - - public String[] subsetStringVector(DataFile datafile, int column) throws IOException { - return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException { - return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(DataFile datafile, int column) throws IOException { - return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG); - } - - // Float methods are temporary; - // In normal operations we'll be treating all the floating point types as - // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest - // tests. -- L.A. - - public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException { - return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT); - } - - public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING); - } - - public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE); - } - - public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG); - } - - public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException { - return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT); - } - - public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException { - if (!dataFile.isTabularData()) { - throw new IOException("DataFile is not tabular data."); - } - - int varcount = dataFile.getDataTable().getVarQuantity().intValue(); - int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); - - if (column >= varcount) { - throw new IOException("Column "+column+" is out of bounds."); - } - - StorageIO dataAccess = dataFile.getStorageIO(); - if (!dataAccess.isLocalFile()) { - throw new IOException("Subsetting is supported on local files only!"); - } - - //File tabfile = datafile.getFileSystemLocation().toFile(); - File tabfile = dataAccess.getFileSystemPath().toFile(); - - if (columntype == COLUMN_TYPE_STRING) { - String filename = dataFile.getFileMetadata().getLabel(); - if (filename != null) { - filename = filename.replaceFirst("^_", ""); - Integer fnumvalue = null; - try { - fnumvalue = new Integer(filename); - } catch (Exception ex){ - fnumvalue = null; - } - if (fnumvalue != null) { - //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) { - if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) { - if (!(fnumvalue.intValue() == 60007 - || fnumvalue.intValue() == 59997 - || fnumvalue.intValue() == 60015 - || fnumvalue.intValue() == 59948 - || fnumvalue.intValue() == 60012 - || fnumvalue.intValue() == 52585 - || fnumvalue.intValue() == 60005 - || fnumvalue.intValue() == 60002 - || fnumvalue.intValue() == 59954 - || fnumvalue.intValue() == 60008 - || fnumvalue.intValue() == 54972 - || fnumvalue.intValue() == 55010 - || fnumvalue.intValue() == 54996 - || fnumvalue.intValue() == 53527 - || fnumvalue.intValue() == 53546 - || fnumvalue.intValue() == 55002 - || fnumvalue.intValue() == 55006 - || fnumvalue.intValue() == 54998 - || fnumvalue.intValue() == 52552 - // SPSS/SAV cases with similar issue - compat mode must be disabled - //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16... - || fnumvalue.intValue() == 54618 // another SAV file, with long strings... - || fnumvalue.intValue() == 54619 // [same] - || fnumvalue.intValue() == 57983 - || fnumvalue.intValue() == 58262 - || fnumvalue.intValue() == 58288 - || fnumvalue.intValue() == 58656 - || fnumvalue.intValue() == 59144 - // || fnumvalue.intValue() == 69626 [nope!] - )) { - dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;"); - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true); - } - } - } - } + private static void skipFirstLine(Scanner scanner) { + if (!scanner.hasNext()) { + throw new RuntimeException("Failed to read the variable name header line from the tab-delimited file!"); } - - return subsetObjectVector(tabfile, column, varcount, casecount, columntype); - } - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException { - return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false); - } - - - - public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException { - - Object[] retVector = null; - - boolean isString = false; - boolean isDouble = false; - boolean isLong = false; - boolean isFloat = false; - - //Locale loc = new Locale("en", "US"); - - if (columntype == COLUMN_TYPE_STRING) { - isString = true; - retVector = new String[casecount]; - } else if (columntype == COLUMN_TYPE_DOUBLE) { - isDouble = true; - retVector = new Double[casecount]; - } else if (columntype == COLUMN_TYPE_LONG) { - isLong = true; - retVector = new Long[casecount]; - } else if (columntype == COLUMN_TYPE_FLOAT){ - isFloat = true; - retVector = new Float[casecount]; - } else { - throw new IOException("Unsupported column type: "+columntype); - } - - File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount); - long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); - long columnOffset = 0; - long columnLength = 0; - - if (column > 0) { - columnOffset = columnEndOffsets[column - 1]; - columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; - } else { - columnOffset = varcount * 8; - columnLength = columnEndOffsets[0] - varcount * 8; - } - int caseindex = 0; - - try (FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), - StandardOpenOption.READ))) { - fc.position(columnOffset); - int MAX_COLUMN_BUFFER = 8192; - - ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); - - if (columnLength < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength)); - } - - long bytesRead = 0; - long bytesReadTotal = 0; - - int byteoffset = 0; - byte[] leftover = null; - - while (bytesReadTotal < columnLength) { - bytesRead = fc.read(in); - byte[] columnBytes = in.array(); - int bytecount = 0; - - while (bytecount < bytesRead) { - if (columnBytes[bytecount] == '\n') { - /* - String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); - - if (leftover != null) { - String leftoverString = new String (leftover, "UTF8"); - token = leftoverString + token; - leftover = null; - } - */ - /* - * Note that the way I was doing it at first - above - - * was not quite the correct way - because I was creating UTF8 - * strings from the leftover bytes, and the bytes in the - * current buffer *separately*; which means, if a multi-byte - * UTF8 character got split in the middle between one buffer - * and the next, both chunks of it would become junk - * characters, on each side! - * The correct way of doing it, of course, is to create a - * merged byte buffer, and then turn it into a UTF8 string. - * -- L.A. 4.0 - */ - String token = null; - - if (leftover == null) { - token = new String(columnBytes, byteoffset, bytecount - byteoffset, "UTF8"); - } else { - byte[] merged = new byte[leftover.length + bytecount - byteoffset]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount - byteoffset); - token = new String(merged, "UTF8"); - leftover = null; - merged = null; - } - - if (isString) { - if ("".equals(token)) { - // An empty string is a string missing value! - // An empty string in quotes is an empty string! - retVector[caseindex] = null; - } else { - // Strip the outer quotes: - token = token.replaceFirst("^\\\"", ""); - token = token.replaceFirst("\\\"$", ""); - - // We need to restore the special characters that - // are stored in tab files escaped - quotes, new lines - // and tabs. Before we do that however, we need to - // take care of any escaped backslashes stored in - // the tab file. I.e., "foo\t" should be transformed - // to "foo"; but "foo\\t" should be transformed - // to "foo\t". This way new lines and tabs that were - // already escaped in the original data are not - // going to be transformed to unescaped tab and - // new line characters! - - String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); - - // (note that it's important to use the 2-argument version - // of String.split(), and set the limit argument to a - // negative value; otherwise any trailing backslashes - // are lost.) - - for (int i = 0; i < splitTokens.length; i++) { - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); - splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); - } - // TODO: - // Make (some of?) the above optional; for ex., we - // do need to restore the newlines when calculating UNFs; - // But if we are subsetting these vectors in order to - // create a new tab-delimited file, they will - // actually break things! -- L.A. Jul. 28 2014 - - token = StringUtils.join(splitTokens, '\\'); - - // "compatibility mode" - a hack, to be able to produce - // unfs identical to those produced by the "early" - // unf5 jar; will be removed in production 4.0. - // -- L.A. (TODO: ...) - if (compatmode && !"".equals(token)) { - if (token.length() > 128) { - if ("".equals(token.trim())) { - // don't ask... - token = token.substring(0, 129); - } else { - token = token.substring(0, 128); - // token = String.format(loc, "%.128s", token); - token = token.trim(); - // dbgLog.info("formatted and trimmed: "+token); - } - } else { - if ("".equals(token.trim())) { - // again, don't ask; - // - this replicates some bugginness - // that happens inside unf5; - token = "null"; - } else { - token = token.trim(); - } - } - } - - retVector[caseindex] = token; - } - } else if (isDouble) { - try { - // TODO: verify that NaN and +-Inf are - // handled correctly here! -- L.A. - // Verified: new Double("nan") works correctly, - // resulting in Double.NaN; - // Double("[+-]Inf") doesn't work however; - // (the constructor appears to be expecting it - // to be spelled as "Infinity", "-Infinity", etc. - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Double(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Double"); - - retVector[caseindex] = null; // missing value - // TODO: ? - } - } else if (isLong) { - try { - retVector[caseindex] = new Long(token); - } catch (NumberFormatException ex) { - retVector[caseindex] = null; // assume missing value - } - } else if (isFloat) { - try { - if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; - } else if ("-inf".equalsIgnoreCase(token)) { - retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; - } else if (token == null || token.equals("")) { - // missing value: - retVector[caseindex] = null; - } else { - retVector[caseindex] = new Float(token); - } - } catch (NumberFormatException ex) { - dbgLog.warning("NumberFormatException thrown for " + token + " as Float"); - retVector[caseindex] = null; // assume missing value (TODO: ?) - } - } - caseindex++; - - if (bytecount == bytesRead - 1) { - byteoffset = 0; - } else { - byteoffset = bytecount + 1; - } - } else { - if (bytecount == bytesRead - 1) { - // We've reached the end of the buffer; - // This means we'll save whatever unused bytes left in - // it - i.e., the bytes between the last new line - // encountered and the end - in the leftover buffer. - - // *EXCEPT*, there may be a case of a very long String - // that is actually longer than MAX_COLUMN_BUFFER, in - // which case it is possible that we've read through - // an entire buffer of bytes without finding any - // new lines... in this case we may need to add this - // entire byte buffer to an already existing leftover - // buffer! - if (leftover == null) { - leftover = new byte[(int) bytesRead - byteoffset]; - System.arraycopy(columnBytes, byteoffset, leftover, 0, (int) bytesRead - byteoffset); - } else { - if (byteoffset != 0) { - throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); - } - byte[] merged = new byte[leftover.length + (int) bytesRead]; - - System.arraycopy(leftover, 0, merged, 0, leftover.length); - System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int) bytesRead); - // leftover = null; - leftover = merged; - merged = null; - } - byteoffset = 0; - - } - } - bytecount++; - } - - bytesReadTotal += bytesRead; - in.clear(); - if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { - in.limit((int) (columnLength - bytesReadTotal)); - } - } - - } - - if (caseindex != casecount) { - throw new IOException("Faile to read "+casecount+" tokens for column "+column); - //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+"."); - } - - return retVector; - } - - private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { - long[] byteOffsets = new long[varcount]; - - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile))) { - - byte[] offsetHeader = new byte[varcount * 8]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException("Could not read " + varcount * 8 + " header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex * 8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - // System.out.println(byteOffsets[varindex]); - } - - } - - return byteOffsets; - } - - private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - File rotatedImageFile = new File(rotatedImageFileName); - if (rotatedImageFile.exists()) { - //System.out.println("Image already exists!"); - return rotatedImageFile; - } - - return generateRotatedImage(tabfile, varcount, casecount); - - } - - private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException { - // TODO: throw exceptions if bad file, zero varcount, etc. ... - - String fileName = tabfile.getAbsolutePath(); - String rotatedImageFileName = fileName + ".90d"; - - int MAX_OUTPUT_STREAMS = 32; - int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now? - int MAX_COLUMN_BUFFER = 8 * 1024; - - // offsetHeader will contain the byte offsets of the individual column - // vectors in the final rotated image file - byte[] offsetHeader = new byte[varcount * 8]; - int[] bufferedSizes = new int[varcount]; - long[] cachedfileSizes = new long[varcount]; - File[] columnTempFiles = new File[varcount]; - - for (int i = 0; i < varcount; i++) { - bufferedSizes[i] = 0; - cachedfileSizes[i] = 0; - } - - // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is - // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?) - - byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER]; - - // read the tab-delimited file: - - try (FileInputStream tabfileStream = new FileInputStream(tabfile); - Scanner scanner = new Scanner(tabfileStream)) { - scanner.useDelimiter("\\n"); - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - if (scanner.hasNext()) { - String[] line = (scanner.next()).split("\t", -1); - // TODO: throw an exception if there are fewer tab-delimited - // tokens than the number of variables specified. - String token = ""; - int tokensize = 0; - for (int varindex = 0; varindex < varcount; varindex++) { - // TODO: figure out the safest way to convert strings to - // bytes here. Is it going to be safer to use getBytes("UTF8")? - // we are already making the assumption that the values - // in the tab file are in UTF8. -- L.A. - token = line[varindex] + "\n"; - tokensize = token.getBytes().length; - if (bufferedSizes[varindex] + tokensize > MAX_COLUMN_BUFFER) { - // fill the buffer and dump its contents into the temp file: - // (do note that there may be *several* MAX_COLUMN_BUFFERs - // worth of bytes in the token!) - - int tokenoffset = 0; - - if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { - tokenoffset = MAX_COLUMN_BUFFER - bufferedSizes[varindex]; - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); - } // (otherwise the buffer is already full, and we should - // simply dump it into the temp file, without adding any - // extra bytes to it) - - File bufferTempFile = columnTempFiles[varindex]; - if (bufferTempFile == null) { - bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); - columnTempFiles[varindex] = bufferTempFile; - } - - // *append* the contents of the buffer to the end of the - // temp file, if already exists: - try (BufferedOutputStream outputStream = new BufferedOutputStream( - new FileOutputStream(bufferTempFile, true))) { - outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - - // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into - // the temp file, for as long as there's more than MAX_COLUMN_BUFFER - // bytes left in the token: - - while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { - outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); - cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; - tokenoffset += MAX_COLUMN_BUFFER; - } - - } - - // buffer the remaining bytes and reset the buffered - // byte counter: - - System.arraycopy(token.getBytes(), - tokenoffset, - bufferedColumns[varindex], - 0, - tokensize - tokenoffset); - - bufferedSizes[varindex] = tokensize - tokenoffset; - - } else { - // continue buffering - System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); - bufferedSizes[varindex] += tokensize; - } - } - } else { - throw new IOException("Tab file has fewer rows than the stored number of cases!"); - } - } - } - - // OK, we've created the individual byte vectors of the tab file columns; - // they may be partially saved in temp files and/or in memory. - // We now need to go through all these buffers and create the final - // rotated image file. - - try (BufferedOutputStream finalOut = new BufferedOutputStream( - new FileOutputStream(new File(rotatedImageFileName)))) { - - // but first we should create the offset header and write it out into - // the final file; because it should be at the head, doh! - - long columnOffset = varcount * 8; - // (this is the offset of the first column vector; it is equal to the - // size of the offset header, i.e. varcount * 8 bytes) - - for (int varindex = 0; varindex < varcount; varindex++) { - long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; - columnOffset += totalColumnBytes; - // totalColumnBytes; - byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); - System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); - } - - finalOut.write(offsetHeader, 0, varcount * 8); - - for (int varindex = 0; varindex < varcount; varindex++) { - long cachedBytesRead = 0; - - // check if there is a cached temp file: - - File cachedTempFile = columnTempFiles[varindex]; - if (cachedTempFile != null) { - byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; - try (BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile))) { - int readlen = 0; - while ((readlen = cachedIn.read(cachedBytes)) > -1) { - finalOut.write(cachedBytes, 0, readlen); - cachedBytesRead += readlen; - } - } - - // delete the temp file: - cachedTempFile.delete(); - - } - - if (cachedBytesRead != cachedfileSizes[varindex]) { - throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ - cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); - } - - // then check if there are any bytes buffered for this column: - - if (bufferedSizes[varindex] > 0) { - finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); - } - - } - } - - return new File(rotatedImageFileName); - - } - - /* - * Test method for taking a "rotated" image, and reversing it, reassembling - * all the columns in the original order. Which should result in a file - * byte-for-byte identical file to the original tab-delimited version. - * - * (do note that this method is not efficiently implemented; it's only - * being used for experiments so far, to confirm the accuracy of the - * accuracy of generateRotatedImage(). It should not be used for any - * practical means in the application!) - */ - private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { - // open the file, read in the offset header: - try (BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile))) { - byte[] offsetHeader = new byte[varcount * 8]; - long[] byteOffsets = new long[varcount]; - - int readlen = rotfileStream.read(offsetHeader); - - if (readlen != varcount * 8) { - throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); - } - - for (int varindex = 0; varindex < varcount; varindex++) { - byte[] offsetBytes = new byte[8]; - System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); - - ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); - byteOffsets[varindex] = offsetByteBuffer.getLong(); - - //System.out.println(byteOffsets[varindex]); - } - - String [][] reversedMatrix = new String[casecount][varcount]; - - long offset = varcount * 8; - byte[] columnBytes; - - for (int varindex = 0; varindex < varcount; varindex++) { - long columnLength = byteOffsets[varindex] - offset; - - - - columnBytes = new byte[(int)columnLength]; - readlen = rotfileStream.read(columnBytes); - - if (readlen != columnLength) { - throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); - } - /* - String columnString = new String(columnBytes); - //System.out.print(columnString); - String[] values = columnString.split("\n", -1); - - if (values.length < casecount) { - throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - reversedMatrix[caseindex][varindex] = values[caseindex]; - }*/ - - int bytecount = 0; - int byteoffset = 0; - int caseindex = 0; - //System.out.println("generating value vector for column "+varindex); - while (bytecount < columnLength) { - if (columnBytes[bytecount] == '\n') { - String token = new String(columnBytes, byteoffset, bytecount-byteoffset); - reversedMatrix[caseindex++][varindex] = token; - byteoffset = bytecount + 1; - } - bytecount++; - } - - if (caseindex != casecount) { - throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); - } - offset = byteOffsets[varindex]; - } - - for (int caseindex = 0; caseindex < casecount; caseindex++) { - for (int varindex = 0; varindex < varcount; varindex++) { - System.out.print(reversedMatrix[caseindex][varindex]); - if (varindex < varcount-1) { - System.out.print("\t"); - } else { - System.out.print("\n"); - } - } - } - - } - - - } - - /** - * main() method, for testing - * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type - * make sure the CLASSPATH contains ... - * - */ - - public static void main(String[] args) { - - String tabFileName = args[0]; - int varcount = new Integer(args[1]).intValue(); - int casecount = new Integer(args[2]).intValue(); - int column = new Integer(args[3]).intValue(); - String type = args[4]; - - File tabFile = new File(tabFileName); - File rotatedImageFile = null; - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - - /* - try { - rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount); - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - */ - - //System.out.println("\nFinished generating \"rotated\" column image file."); - - //System.out.println("\nOffsets:"); - - MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN); - String FORMAT_IEEE754 = "%+#.15e"; - - try { - //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount); - //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - if ("string".equals(type)) { - String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - System.out.println(columns[i]); - } - } else { - - Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount); - for (int i = 0; i < casecount; i++) { - if (columns[i] != null) { - BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext); - System.out.println(String.format(FORMAT_IEEE754, outBigDecimal)); - } else { - System.out.println("NA"); - } - //System.out.println(columns[i]); - } - } - } catch (IOException ex) { - System.out.println(ex.getMessage()); - } - } -} - - + scanner.next(); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java deleted file mode 100644 index 89e033353c1..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetInputStream.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ - -package edu.harvard.iq.dataverse.dataaccess; - -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.datavariable.DataVariable; -import java.io.IOException; -import java.io.InputStream; -import java.util.List; -import java.util.logging.Logger; - -/** - * - * @author Leonid Andreev - */ -public class TabularSubsetInputStream extends InputStream { - private static final Logger logger = Logger.getLogger(TabularSubsetInputStream.class.getCanonicalName()); - - private TabularSubsetGenerator subsetGenerator = null; - private int numberOfSubsetVariables; - private int numberOfObservations; - private int numberOfObservationsRead = 0; - private byte[] leftoverBytes = null; - - public TabularSubsetInputStream(DataFile datafile, List variables) throws IOException { - if (datafile == null) { - throw new IOException("Null datafile in subset request"); - } - if (!datafile.isTabularData()) { - throw new IOException("Subset requested on a non-tabular data file"); - } - numberOfObservations = datafile.getDataTable().getCaseQuantity().intValue(); - - if (variables == null || variables.size() < 1) { - throw new IOException("Null or empty list of variables in subset request."); - } - numberOfSubsetVariables = variables.size(); - subsetGenerator = new TabularSubsetGenerator(datafile, variables); - - } - - //@Override - public int read() throws IOException { - throw new IOException("read() method not implemented; do not use."); - } - - //@Override - public int read(byte[] b) throws IOException { - // TODO: - // Move this code into TabularSubsetGenerator - logger.fine("subset input stream: read request, on a "+b.length+" byte buffer;"); - - if (numberOfSubsetVariables == 1) { - logger.fine("calling the single variable subset read method"); - return subsetGenerator.readSingleColumnSubset(b); - } - - int bytesread = 0; - byte [] linebuffer; - - // do we have a leftover? - if (leftoverBytes != null) { - if (leftoverBytes.length < b.length) { - System.arraycopy(leftoverBytes, 0, b, 0, leftoverBytes.length); - bytesread = leftoverBytes.length; - leftoverBytes = null; - - } else { - // shouldn't really happen... unless it's a very large subset, - // or a very long string, etc. - System.arraycopy(leftoverBytes, 0, b, 0, b.length); - byte[] tmp = new byte[leftoverBytes.length - b.length]; - System.arraycopy(leftoverBytes, b.length, tmp, 0, leftoverBytes.length - b.length); - leftoverBytes = tmp; - tmp = null; - return b.length; - } - } - - while (bytesread < b.length && numberOfObservationsRead < numberOfObservations) { - linebuffer = subsetGenerator.readSubsetLineBytes(); - numberOfObservationsRead++; - - if (bytesread + linebuffer.length < b.length) { - // copy linebuffer into the return buffer: - System.arraycopy(linebuffer, 0, b, bytesread, linebuffer.length); - bytesread += linebuffer.length; - } else { - System.arraycopy(linebuffer, 0, b, bytesread, b.length - bytesread); - // save the leftover; - if (bytesread + linebuffer.length > b.length) { - leftoverBytes = new byte[bytesread + linebuffer.length - b.length]; - System.arraycopy(linebuffer, b.length - bytesread, leftoverBytes, 0, bytesread + linebuffer.length - b.length); - } - return b.length; - } - } - - // and this means we've reached the end of the tab file! - - return bytesread > 0 ? bytesread : -1; - } - - //@Override - public void close() { - if (subsetGenerator != null) { - subsetGenerator.close(); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java index 3329d92b7a9..474674bda73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleException.java @@ -1,7 +1,9 @@ package edu.harvard.iq.dataverse.datacapturemodule; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleException extends Exception { + @Deprecated(forRemoval = true, since = "2024-07-07") public DataCaptureModuleException(String message, Throwable cause) { super(message, cause); } diff --git a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java index 460e4727afc..094d3976133 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/datacapturemodule/DataCaptureModuleUtil.java @@ -12,10 +12,12 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; +@Deprecated(forRemoval = true, since = "2024-07-07") public class DataCaptureModuleUtil { private static final Logger logger = Logger.getLogger(DataCaptureModuleUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { logger.fine("uploadMethodsSettings: " + uploadMethodsSettings);; if (uploadMethodsSettings==null){ @@ -28,6 +30,7 @@ public static boolean rsyncSupportEnabled(String uploadMethodsSettings) { /** * generate JSON to send to DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Dataset dataset) { JsonObjectBuilder jab = Json.createObjectBuilder(); // The general rule should be to always pass the user id and dataset identifier to the DCM. @@ -39,6 +42,7 @@ public static JsonObject generateJsonForUploadRequest(AuthenticatedUser user, Da /** * transfer script from DCM */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static ScriptRequestResponse getScriptFromRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); JsonNode body = uploadRequest.getBody(); @@ -54,6 +58,7 @@ public static ScriptRequestResponse getScriptFromRequest(HttpResponse return scriptRequestResponse; } + @Deprecated(forRemoval = true, since = "2024-07-07") static UploadRequestResponse makeUploadRequest(HttpResponse uploadRequest) { int status = uploadRequest.getStatus(); String body = uploadRequest.getBody(); @@ -61,6 +66,7 @@ static UploadRequestResponse makeUploadRequest(HttpResponse uploadReques return new UploadRequestResponse(uploadRequest.getStatus(), body); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getMessageFromException(DataCaptureModuleException ex) { if (ex == null) { return "DataCaptureModuleException was null!"; @@ -76,6 +82,7 @@ public static String getMessageFromException(DataCaptureModuleException ex) { return message + " was caused by " + cause.getMessage(); } + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getScriptName(DatasetVersion datasetVersion) { return "upload-" + datasetVersion.getDataset().getIdentifier().replace("/", "_") + ".bash"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java index 096f1f87acc..98bd26b51d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataset/DatasetUtil.java @@ -1,17 +1,14 @@ package edu.harvard.iq.dataverse.dataset; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; + +import edu.harvard.iq.dataverse.dataaccess.InputStreamIO; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -31,15 +28,14 @@ import java.util.*; import java.util.logging.Logger; import javax.imageio.ImageIO; + +import jakarta.enterprise.inject.spi.CDI; import org.apache.commons.io.IOUtils; -import static edu.harvard.iq.dataverse.dataaccess.DataAccess.getStorageIO; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.license.License; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.StringUtil; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.EnumUtils; @@ -218,7 +214,8 @@ public static boolean deleteDatasetLogo(Dataset dataset) { storageIO.deleteAuxObject(datasetLogoThumbnail + thumbExtension + ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); } catch (IOException ex) { - logger.info("Failed to delete dataset logo: " + ex.getMessage()); + logger.fine("Failed to delete dataset logo: " + ex.getMessage() + + " (this is most likely harmless; this method is often called without checking if the custom dataset logo was in fact present)"); return false; } return true; @@ -293,7 +290,7 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat dataAccess = DataAccess.getStorageIO(dataset); } catch(IOException ioex){ - //TODO: Add a suitable waing message + //TODO: Add a suitable warning message logger.warning("Failed to save the file, storage id " + dataset.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); } @@ -355,30 +352,44 @@ public static Dataset persistDatasetLogoToStorageAndCreateThumbnails(Dataset dat // We'll try to pre-generate the rescaled versions in both the // DEFAULT_DATASET_LOGO (currently 140) and DEFAULT_CARDIMAGE_SIZE (48) String thumbFileLocation = ImageThumbConverter.rescaleImage(fullSizeImage, width, height, ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE, tmpFileForResize.toPath().toString()); - logger.fine("thumbFileLocation = " + thumbFileLocation); - logger.fine("tmpFileLocation=" + tmpFileForResize.toPath().toString()); - //now we must save the updated thumbnail - try { - dataAccess.savePathAsAux(Paths.get(thumbFileLocation), datasetLogoThumbnail+thumbExtension+ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); - } catch (IOException ex) { - logger.severe("Failed to move updated thumbnail file from " + tmpFile.getAbsolutePath() + " to its DataAccess location" + ": " + ex); + if (thumbFileLocation == null) { + logger.warning("Rescale Thumbnail Image to logo failed"); + dataset.setPreviewImageAvailable(false); + dataset.setUseGenericThumbnail(true); + } else { + logger.fine("thumbFileLocation = " + thumbFileLocation); + logger.fine("tmpFileLocation=" + tmpFileForResize.toPath().toString()); + //now we must save the updated thumbnail + try { + dataAccess.savePathAsAux(Paths.get(thumbFileLocation), datasetLogoThumbnail + thumbExtension + ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); + } catch (IOException ex) { + logger.severe("Failed to move updated thumbnail file from " + tmpFile.getAbsolutePath() + " to its DataAccess location" + ": " + ex); + } } thumbFileLocation = ImageThumbConverter.rescaleImage(fullSizeImage, width, height, ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE, tmpFileForResize.toPath().toString()); - logger.fine("thumbFileLocation = " + thumbFileLocation); - logger.fine("tmpFileLocation=" + tmpFileForResize.toPath().toString()); - //now we must save the updated thumbnail - try { - dataAccess.savePathAsAux(Paths.get(thumbFileLocation), datasetLogoThumbnail+thumbExtension+ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } catch (IOException ex) { - logger.severe("Failed to move updated thumbnail file from " + tmpFile.getAbsolutePath() + " to its DataAccess location" + ": " + ex); + if (thumbFileLocation == null) { + logger.warning("Rescale Thumbnail Image to card failed"); + dataset.setPreviewImageAvailable(false); + dataset.setUseGenericThumbnail(true); + } else { + logger.fine("thumbFileLocation = " + thumbFileLocation); + logger.fine("tmpFileLocation=" + tmpFileForResize.toPath().toString()); + //now we must save the updated thumbnail + try { + dataAccess.savePathAsAux(Paths.get(thumbFileLocation), datasetLogoThumbnail + thumbExtension + ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + } catch (IOException ex) { + logger.severe("Failed to move updated thumbnail file from " + tmpFile.getAbsolutePath() + " to its DataAccess location" + ": " + ex); + } } //This deletes the tempfiles created for rescaling and encoding boolean tmpFileWasDeleted = tmpFile.delete(); boolean originalTempFileWasDeleted = tmpFileForResize.delete(); try { - Files.delete(Paths.get(thumbFileLocation)); + if (thumbFileLocation != null) { + Files.delete(Paths.get(thumbFileLocation)); + } } catch (IOException ioex) { logger.fine("Failed to delete temporary thumbnail file"); } @@ -411,6 +422,80 @@ public static InputStream getThumbnailAsInputStream(Dataset dataset, int size) { return nonDefaultDatasetThumbnail; } } + + public static InputStream getLogoAsInputStream(Dataset dataset) { + if (dataset == null) { + return null; + } + StorageIO dataAccess = null; + + try { + dataAccess = DataAccess.getStorageIO(dataset); + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + } + + InputStream in = null; + try { + if (dataAccess == null) { + logger.warning( + "getLogo(): Failed to initialize dataset StorageIO for " + dataset.getStorageIdentifier()); + } else { + in = dataAccess.getAuxFileAsInputStream(datasetLogoFilenameFinal); + } + } catch (IOException ex) { + logger.fine( + "Dataset-level thumbnail file does not exist, or failed to open; will try to find an image file that can be used as the thumbnail."); + } + + if (in == null) { + DataFile thumbnailFile = dataset.getThumbnailFile(); + + if (thumbnailFile == null) { + if (dataset.isUseGenericThumbnail()) { + logger.fine("Dataset (id :" + dataset.getId() + ") does not have a logo and is 'Use Generic'."); + return null; + } else { + thumbnailFile = attemptToAutomaticallySelectThumbnailFromDataFiles(dataset, null); + if (thumbnailFile == null) { + logger.fine("Dataset (id :" + dataset.getId() + + ") does not have a logo available that could be selected automatically."); + return null; + } else { + + } + } + } + if (thumbnailFile.isRestricted()) { + logger.fine("Dataset (id :" + dataset.getId() + + ") has a logo the user selected but the file must have later been restricted. Returning null."); + return null; + } + + try { + + boolean origImageFailed = thumbnailFile.isPreviewImageFail(); + InputStreamIO isIO = ImageThumbConverter.getImageThumbnailAsInputStream(thumbnailFile.getStorageIO(), + ImageThumbConverter.DEFAULT_DATASETLOGO_SIZE); + if (!origImageFailed && thumbnailFile.isPreviewImageFail()) { + // We found an older 0 length thumbnail. Newer image uploads will not have this issue. + // Once cleaned up, this thumbnail will no longer have this issue + // ImageThumbConverter fixed the DataFile + // Now we need to update dataset since this is a bad logo + DatasetServiceBean datasetService = CDI.current().select(DatasetServiceBean.class).get(); + datasetService.clearDatasetLevelThumbnail(dataset); + } + in = isIO != null ? isIO.getInputStream() : null; + } catch (IOException ioex) { + logger.warning("getLogo(): Failed to get logo from DataFile for " + dataset.getStorageIdentifier() + + " (" + ioex.getMessage() + ")"); + ioex.printStackTrace(); + } + + } + return in; + } /** * The dataset logo is the file that a user uploads which is *not* one of diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index d44388f39f7..0143fced87c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -61,6 +61,7 @@ import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import org.apache.commons.io.IOUtils; import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDataFilesCommand; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; /** @@ -1212,9 +1213,9 @@ private boolean step_030_createNewFilesViaIngest(){ this.newCheckSumType, this.systemConfig);*/ - DataFileServiceBean.UserStorageQuota quota = null; + UploadSessionQuotaLimit quota = null; if (systemConfig.isStorageQuotasEnforced()) { - quota = fileService.getUserStorageQuota(dvRequest.getAuthenticatedUser(), dataset); + quota = fileService.getUploadSessionQuotaLimit(dataset); } Command cmd = new CreateNewDataFilesCommand(dvRequest, workingVersion, newFileInputStream, newFileName, newFileContentType, newStorageIdentifier, quota, newCheckSum, newCheckSumType); CreateDataFileResult createDataFilesResult = commandEngine.submit(cmd); diff --git a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java index 29e821c28a4..147c2c004db 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/datavariable/VariableMetadata.java @@ -71,6 +71,7 @@ public class VariableMetadata implements Serializable { /** * universe: metadata variable field. */ + @Column(columnDefinition="TEXT") private String universe; /** diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java index 55a375acb6c..96330271367 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/CommandContext.java @@ -1,9 +1,7 @@ package edu.harvard.iq.dataverse.engine.command; -import edu.harvard.iq.dataverse.DOIDataCiteServiceBean; -import edu.harvard.iq.dataverse.DOIEZIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.DataFileServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersionServiceBean; @@ -18,6 +16,7 @@ import edu.harvard.iq.dataverse.FileDownloadServiceBean; import edu.harvard.iq.dataverse.GuestbookResponseServiceBean; import edu.harvard.iq.dataverse.GuestbookServiceBean; +import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.PermissionServiceBean; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; @@ -32,13 +31,13 @@ import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleServiceBean; import edu.harvard.iq.dataverse.engine.DataverseEngine; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexBatchServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import java.util.Stack; @@ -99,15 +98,7 @@ public interface CommandContext { public DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels(); - public DOIEZIdServiceBean doiEZId(); - - public DOIDataCiteServiceBean doiDataCite(); - - public FakePidProviderServiceBean fakePidProvider(); - - public HandlenetServiceBean handleNet(); - - public PermaLinkPidProviderServiceBean permaLinkProvider(); + public PidProviderFactoryBean pidProviderFactory(); public GuestbookServiceBean guestbooks(); @@ -126,6 +117,8 @@ public interface CommandContext { public UserNotificationServiceBean notifications(); public AuthenticationServiceBean authentication(); + + public StorageUseServiceBean storageUse(); public SystemConfig systemConfig(); @@ -142,7 +135,9 @@ public interface CommandContext { public ConfirmEmailServiceBean confirmEmail(); public ActionLogServiceBean actionLog(); - + + public MetadataBlockServiceBean metadataBlocks(); + public void beginCommandSequence(); public boolean completeCommandSequence(Command command); @@ -152,4 +147,6 @@ public interface CommandContext { public Stack getCommandsCalled(); public void addCommand(Command command); + + public DatasetFieldServiceBean dsField(); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/RateLimitCommandException.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/RateLimitCommandException.java new file mode 100644 index 00000000000..99a665b31ac --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/exception/RateLimitCommandException.java @@ -0,0 +1,16 @@ +package edu.harvard.iq.dataverse.engine.command.exception; + +import edu.harvard.iq.dataverse.engine.command.Command; + +/** + * An exception raised when a command cannot be executed, due to the + * issuing user being rate limited. + * + * @author + */ +public class RateLimitCommandException extends CommandException { + + public RateLimitCommandException(String message, Command aCommand) { + super(message, aCommand); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 303d8e1c25f..ab78a88c9a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -1,24 +1,19 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.dataaccess.DataAccess; -import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleUtil; -import edu.harvard.iq.dataverse.datacapturemodule.ScriptRequestResponse; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; -import java.io.IOException; import java.util.Objects; -import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.solr.client.solrj.SolrServerException; /**; * An abstract base class for commands that creates {@link Dataset}s. @@ -81,9 +76,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { additionalParameterTests(ctxt); Dataset theDataset = getDataset(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(theDataset); + if ( isEmpty(theDataset.getIdentifier()) ) { - theDataset.setIdentifier(idServiceBean.generateDatasetIdentifier(theDataset)); + pidProvider.generatePid(theDataset); } DatasetVersion dsv = getVersionToPersist(theDataset); @@ -94,6 +90,8 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if(!harvested) { checkSystemMetadataKeyIfNeeded(dsv, null); } + + registerExternalVocabValuesIfAny(ctxt, dsv); theDataset.setCreator((AuthenticatedUser) getRequest().getUser()); @@ -105,19 +103,18 @@ public Dataset execute(CommandContext ctxt) throws CommandException { dataFile.setCreateDate(theDataset.getCreateDate()); } - String nonNullDefaultIfKeyNotFound = ""; if (theDataset.getProtocol()==null) { - theDataset.setProtocol(ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound)); + theDataset.setProtocol(pidProvider.getProtocol()); } if (theDataset.getAuthority()==null) { - theDataset.setAuthority(ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound)); + theDataset.setAuthority(pidProvider.getAuthority()); } if (theDataset.getStorageIdentifier() == null) { String driverId = theDataset.getEffectiveStorageDriverId(); theDataset.setStorageIdentifier(driverId + DataAccess.SEPARATOR + theDataset.getAuthorityForFileStorage() + "/" + theDataset.getIdentifierForFileStorage()); } if (theDataset.getIdentifier()==null) { - theDataset.setIdentifier(idServiceBean.generateDatasetIdentifier(theDataset)); + pidProvider.generatePid(theDataset); } // Attempt the registration if importing dataset through the API, or the app (but not harvest) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java index 6061461306d..1a1f4f9318b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractDatasetCommand.java @@ -2,10 +2,13 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersionDifference; import edu.harvard.iq.dataverse.DatasetVersionUser; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -13,6 +16,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; @@ -22,10 +27,8 @@ import java.util.logging.Logger; import static java.util.stream.Collectors.joining; +import jakarta.ejb.EJB; import jakarta.validation.ConstraintViolation; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.MetadataBlock; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.settings.JvmSettings; /** @@ -152,18 +155,18 @@ protected void validateOrDie(DatasetVersion dsv, Boolean lenient) throws Command */ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctxt, boolean retry) throws CommandException { if (!theDataset.isIdentifierRegistered()) { - GlobalIdServiceBean globalIdServiceBean = GlobalIdServiceBean.getBean(theDataset.getProtocol(), ctxt); - if ( globalIdServiceBean != null ) { + PidProvider pidProvider = PidUtil.getPidProvider(theDataset.getGlobalId().getProviderId()); + if ( pidProvider != null ) { try { - if (globalIdServiceBean.alreadyRegistered(theDataset)) { + if (pidProvider.alreadyRegistered(theDataset)) { int attempts = 0; if(retry) { do { - theDataset.setIdentifier(globalIdServiceBean.generateDatasetIdentifier(theDataset)); + pidProvider.generatePid(theDataset); logger.log(Level.INFO, "Attempting to register external identifier for dataset {0} (trying: {1}).", new Object[]{theDataset.getId(), theDataset.getIdentifier()}); attempts++; - } while (globalIdServiceBean.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); + } while (pidProvider.alreadyRegistered(theDataset) && attempts <= FOOLPROOF_RETRIAL_ATTEMPTS_LIMIT); } if(!retry) { logger.warning("Reserving PID for: " + getDataset().getId() + " during publication failed."); @@ -177,7 +180,7 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } // Invariant: Dataset identifier does not exist in the remote registry try { - globalIdServiceBean.createIdentifier(theDataset); + pidProvider.createIdentifier(theDataset); theDataset.setGlobalIdCreateTime(getTimestamp()); theDataset.setIdentifierRegistered(true); } catch (Throwable ex) { @@ -185,7 +188,7 @@ protected void registerExternalIdentifier(Dataset theDataset, CommandContext ctx } } catch (Throwable e) { - throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", globalIdServiceBean.getProviderInformation()), this); + throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), this); } } else { throw new IllegalCommandException("This dataset may not be published because its id registry service is not supported.", this); @@ -230,4 +233,13 @@ protected void checkSystemMetadataKeyIfNeeded(DatasetVersion newVersion, Dataset } } } + + protected void registerExternalVocabValuesIfAny(CommandContext ctxt, DatasetVersion newVersion) { + for (DatasetField df : newVersion.getFlatDatasetFields()) { + logger.fine("Found id: " + df.getDatasetFieldType().getId()); + if (ctxt.dsField().getCVocConf(true).containsKey(df.getDatasetFieldType().getId())) { + ctxt.dsField().registerExternalVocabValues(df); + } + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractGetPublishedFileMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractGetPublishedFileMetadataCommand.java new file mode 100644 index 00000000000..4fef2c27efb --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractGetPublishedFileMetadataCommand.java @@ -0,0 +1,39 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; + +@RequiredPermissions({}) +abstract class AbstractGetPublishedFileMetadataCommand extends AbstractCommand { + protected final DataFile dataFile; + protected final boolean includeDeaccessioned; + + public AbstractGetPublishedFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile); + this.dataFile = dataFile; + this.includeDeaccessioned = includeDeaccessioned; + } + + protected FileMetadata getLatestPublishedFileMetadata(CommandContext ctxt) { + return dataFile.getFileMetadatas().stream().filter(fileMetadata -> { + DatasetVersion.VersionState versionState = fileMetadata.getDatasetVersion().getVersionState(); + return (!versionState.equals(DatasetVersion.VersionState.DRAFT) + && isDatasetVersionAccessible(fileMetadata.getDatasetVersion(), dataFile.getOwner(), ctxt)); + }).reduce(null, DataFile::getTheNewerFileMetadata); + } + + protected boolean isDatasetVersionAccessible(DatasetVersion datasetVersion, Dataset ownerDataset, CommandContext ctxt) { + return datasetVersion.isReleased() || isDatasetVersionDeaccessionedAndAccessible(datasetVersion, ownerDataset, ctxt); + } + + private boolean isDatasetVersionDeaccessionedAndAccessible(DatasetVersion datasetVersion, Dataset ownerDataset, CommandContext ctxt) { + return includeDeaccessioned && datasetVersion.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ownerDataset).has(Permission.EditDataset); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java index b988fd05f03..29c27d0396d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractSubmitToArchiveCommand.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; @@ -14,6 +13,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.bagit.BagGenerator; import edu.harvard.iq.dataverse.util.bagit.OREMap; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java index 5577d541012..121af765737 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AssignRoleCommand.java @@ -3,7 +3,6 @@ */ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.DataverseRole; @@ -18,7 +17,10 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; + import java.util.Collections; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -67,17 +69,38 @@ public RoleAssignment execute(CommandContext ctxt) throws CommandException { throw new IllegalCommandException("User " + user.getUserIdentifier() + " is deactivated and cannot be given a role.", this); } } + if(isExistingRole(ctxt)){ + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.grant.role.assignee.has.role.error"), this); + } // TODO make sure the role is defined on the dataverse. RoleAssignment roleAssignment = new RoleAssignment(role, grantee, defPoint, privateUrlToken, anonymizedAccess); return ctxt.roles().save(roleAssignment); } + private boolean isExistingRole(CommandContext ctxt) { + return ctxt.roles() + .directRoleAssignments(grantee, defPoint) + .stream() + .map(RoleAssignment::getRole) + .anyMatch(it -> it.equals(role)); + } + @Override public Map> getRequiredPermissions() { // for data file check permission on owning dataset - return Collections.singletonMap("", - defPoint instanceof Dataverse ? Collections.singleton(Permission.ManageDataversePermissions) - : defPoint instanceof Dataset ? Collections.singleton(Permission.ManageDatasetPermissions) : Collections.singleton(Permission.ManageFilePermissions)); + Set requiredPermissions = new HashSet(); + + if (defPoint instanceof Dataverse) { + requiredPermissions.add(Permission.ManageDataversePermissions); + } else if (defPoint instanceof Dataset) { + requiredPermissions.add(Permission.ManageDatasetPermissions); + } else { + requiredPermissions.add(Permission.ManageFilePermissions); + } + + requiredPermissions.addAll(role.permissions()); + + return Collections.singletonMap("", requiredPermissions); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForCollectionPageCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForCollectionPageCommand.java new file mode 100644 index 00000000000..b23e6034c9a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForCollectionPageCommand.java @@ -0,0 +1,16 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class CheckRateLimitForCollectionPageCommand extends AbstractVoidCommand { + public CheckRateLimitForCollectionPageCommand(DataverseRequest aRequest, DvObject dvObject) { + super(aRequest, dvObject); + } + + @Override + protected void executeImpl(CommandContext ctxt) throws CommandException { } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForDatasetPageCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForDatasetPageCommand.java new file mode 100644 index 00000000000..da8c1e4d8e3 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CheckRateLimitForDatasetPageCommand.java @@ -0,0 +1,17 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class CheckRateLimitForDatasetPageCommand extends AbstractVoidCommand { + + public CheckRateLimitForDatasetPageCommand(DataverseRequest aRequest, DvObject dvObject) { + super(aRequest, dvObject); + } + + @Override + protected void executeImpl(CommandContext ctxt) throws CommandException { } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java index bcaece55fed..6539ac27ea2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDatasetVersionCommand.java @@ -59,7 +59,8 @@ public DatasetVersion execute(CommandContext ctxt) throws CommandException { //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. checkSystemMetadataKeyIfNeeded(newVersion, latest); - + registerExternalVocabValuesIfAny(ctxt, newVersion); + List newVersionMetadatum = new ArrayList<>(latest.getFileMetadatas().size()); for ( FileMetadata fmd : latest.getFileMetadatas() ) { FileMetadata fmdCopy = fmd.createCopy(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 0470f59b861..3a21345448b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -13,8 +13,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; -import edu.harvard.iq.dataverse.DataFileServiceBean.UserStorageQuota; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; @@ -74,7 +74,7 @@ public class CreateNewDataFilesCommand extends AbstractCommand fileSizeLimit) { try { tempFile.toFile().delete(); @@ -213,11 +216,11 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException } DataFile datafile = null; - long fileSize = 0L; + long uncompressedFileSize = -1; try { uncompressedIn = new GZIPInputStream(new FileInputStream(tempFile.toFile())); File unZippedTempFile = saveInputStreamInTempFile(uncompressedIn, fileSizeLimit, storageQuotaLimit); - fileSize = unZippedTempFile.length(); + uncompressedFileSize = unZippedTempFile.length(); datafile = FileUtil.createSingleDataFile(version, unZippedTempFile, finalFileName, MIME_TYPE_UNDETERMINED_DEFAULT, ctxt.systemConfig().getFileFixityChecksumAlgorithm()); } catch (IOException | FileExceedsMaxSizeException | FileExceedsStorageQuotaException ioex) { // it looks like we simply skip the file silently, if its uncompressed size @@ -248,7 +251,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException datafiles.add(datafile); // Update quota if present if (quota != null) { - quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize); + quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + uncompressedFileSize); } return CreateDataFileResult.success(fileName, finalType, datafiles); } @@ -628,7 +631,35 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException throw new CommandExecutionException("Failed to process uploaded BagIt file", ioex, this); } } + + // These are the final File and its size that will be used to + // add create a single Datafile: + + newFile = tempFile.toFile(); + fileSize = newFile.length(); + } else { + // Direct upload. + + // Since this is a direct upload, and therefore no temp file associated + // with it, we may, OR MAY NOT know the size of the file. If this is + // a direct upload via the UI, the page must have already looked up + // the size, after the client confirmed that the upload had completed. + // (so that we can reject the upload here, i.e. before the user clicks + // save, if it's over the size limit or storage quota). However, if + // this is a direct upload via the API, we will wait until the + // upload is finalized in the saveAndAddFiles method to enforce the + // limits. + if (newFileSize != null) { + fileSize = newFileSize; + + // if the size is specified, and it's above the individual size + // limit for this store, we can reject it now: + if (fileSizeLimit != null && fileSize > fileSizeLimit) { + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(fileSize), bytesToHumanReadable(fileSizeLimit)), this); + } + } + // Default to suppliedContentType if set or the overall undetermined default if a contenttype isn't supplied finalType = StringUtils.isBlank(suppliedContentType) ? FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT : suppliedContentType; String type = determineFileTypeByNameAndExtension(fileName); @@ -639,34 +670,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException } logger.fine("Supplied type: " + suppliedContentType + ", finalType: " + finalType); } + + } + // Finally, if none of the special cases above were applicable (or // if we were unable to unpack an uploaded file, etc.), we'll just // create and return a single DataFile: - File newFile = null; - long fileSize = -1; - if (tempFile != null) { - newFile = tempFile.toFile(); - fileSize = newFile.length(); - } else { - // If this is a direct upload, and therefore no temp file associated - // with it, the file size must be explicitly passed to the command - // (note that direct upload relies on knowing the size of the file - // that's being uploaded in advance). - if (newFileSize != null) { - fileSize = newFileSize; - } else { - // This is a direct upload via the API (DVUploader, etc.) - //throw new CommandExecutionException("File size must be explicitly specified when creating DataFiles with Direct Upload", this); - } - } // We have already checked that this file does not exceed the individual size limit; // but if we are processing it as is, as a single file, we need to check if // its size does not go beyond the allocated storage quota (if specified): - if (storageQuotaLimit != null && fileSize > storageQuotaLimit) { if (newFile != null) { // Remove the temp. file, if this is a non-direct upload. @@ -685,7 +701,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException DataFile datafile = FileUtil.createSingleDataFile(version, newFile, newStorageIdentifier, fileName, finalType, newCheckSumType, newCheckSum); - if (datafile != null && ((newFile != null) || (newStorageIdentifier != null))) { + if (datafile != null) { if (warningMessage != null) { createIngestFailureReport(datafile, warningMessage); @@ -696,10 +712,19 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException } datafiles.add(datafile); - // Update quota (may not be necessary in the context of direct upload - ?) + // Update the quota definition for the *current upload session* + // This is relevant for the uploads going through the UI page + // (where there may be an appreciable amount of time between the user + // uploading the files and clicking "save". The file size should be + // available here for both direct and local uploads via the UI. + // It is not yet available if this is direct-via-API - but + // for API uploads the quota check will be enforced during the final + // save. if (fileSize > 0 && quota != null) { + logger.info("Setting total usage in bytes to " + (quota.getTotalUsageInBytes() + fileSize)); quota.setTotalUsageInBytes(quota.getTotalUsageInBytes() + fileSize); } + return CreateDataFileResult.success(fileName, finalType, datafiles); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java index c9ebe735e31..c22a2cdb4a2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDatasetCommand.java @@ -3,6 +3,7 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.Template; import edu.harvard.iq.dataverse.UserNotification; @@ -12,12 +13,13 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import static edu.harvard.iq.dataverse.util.StringUtil.nonEmpty; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import java.util.List; import java.sql.Timestamp; @@ -71,13 +73,18 @@ public CreateNewDatasetCommand(Dataset theDataset, DataverseRequest aRequest, Te */ @Override protected void additionalParameterTests(CommandContext ctxt) throws CommandException { - if ( nonEmpty(getDataset().getIdentifier()) ) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(getDataset().getProtocol(), ctxt); - if ( !idServiceBean.isGlobalIdUnique(getDataset().getGlobalId()) ) { - throw new IllegalCommandException(String.format("Dataset with identifier '%s', protocol '%s' and authority '%s' already exists", - getDataset().getIdentifier(), getDataset().getProtocol(), getDataset().getAuthority()), - this); - } + if (nonEmpty(getDataset().getIdentifier())) { + GlobalId pid = getDataset().getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + + if (!pidProvider.isGlobalIdUnique(pid)) { + throw new IllegalCommandException(String.format( + "Dataset with identifier '%s', protocol '%s' and authority '%s' already exists", + getDataset().getIdentifier(), getDataset().getProtocol(), getDataset().getAuthority()), + this); + } + } } } @@ -88,11 +95,11 @@ protected DatasetVersion getVersionToPersist( Dataset theDataset ) { @Override protected void handlePid(Dataset theDataset, CommandContext ctxt) throws CommandException { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - if(!idServiceBean.isConfigured()) { - throw new IllegalCommandException("PID Provider " + idServiceBean.getProviderInformation().get(0) + " is not configured.", this); + PidProvider pidProvider = PidUtil.getPidProvider(theDataset.getGlobalId().getProviderId()); + if(!pidProvider.canManagePID()) { + throw new IllegalCommandException("PID Provider " + pidProvider.getId() + " is not configured.", this); } - if ( !idServiceBean.registerWhenPublished() ) { + if ( !pidProvider.registerWhenPublished() ) { // pre-register a persistent id registerExternalIdentifier(theDataset, ctxt, true); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java new file mode 100644 index 00000000000..c0f863686da --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteCollectionQuotaCommand.java @@ -0,0 +1,53 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.storageuse.StorageQuota; +import edu.harvard.iq.dataverse.util.BundleUtil; +import java.util.logging.Logger; + +/** + * + * @author landreev + * + * A superuser-only command: + */ +@RequiredPermissions({}) +public class DeleteCollectionQuotaCommand extends AbstractVoidCommand { + + private static final Logger logger = Logger.getLogger(DeleteCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse targetDataverse; + + public DeleteCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + targetDataverse = target; + } + + @Override + public void executeImpl(CommandContext ctxt) throws CommandException { + // first check if user is a superuser + if ( (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser() ) ) { + throw new PermissionException(BundleUtil.getStringFromBundle("dataverse.storage.quota.superusersonly"), + this, null, targetDataverse); + } + + if (targetDataverse == null) { + throw new IllegalCommandException("", this); + } + + StorageQuota storageQuota = targetDataverse.getStorageQuota(); + + if (storageQuota != null && storageQuota.getAllocation() != null) { + ctxt.dataverses().disableStorageQuota(storageQuota); + } + // ... and if no quota was enabled on the collection - nothing to do = success + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java index 83d0f877d61..0812c52a846 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteDataFileCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -11,6 +12,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.StringUtil; import java.io.IOException; @@ -23,7 +26,6 @@ import java.util.Collections; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; /** * Deletes a data file, both DB entity and filesystem object. @@ -202,15 +204,18 @@ public FileVisitResult postVisitDirectory(final Path dir, final IOException e) */ } } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - try { - if (idServiceBean.alreadyRegistered(doomed)) { - idServiceBean.deleteIdentifier(doomed); + GlobalId pid = doomed.getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + + try { + if (pidProvider.alreadyRegistered(doomed)) { + pidProvider.deleteIdentifier(doomed); + } + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage()); } - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage()); } - DataFile doomedAndMerged = ctxt.em().merge(doomed); ctxt.em().remove(doomedAndMerged); /** @@ -235,6 +240,20 @@ public String describe() { @Override public boolean onSuccess(CommandContext ctxt, Object r) { + // Adjust the storage use for the parent containers: + if (!doomed.isHarvested()) { + long storedSize = doomed.getFilesize(); + // ingested tabular data files also have saved originals that + // are counted as "storage use" + Long savedOriginalSize = doomed.getOriginalFileSize(); + if (savedOriginalSize != null) { + // Note that DataFile.getFilesize() can return -1 (for "unknown"): + storedSize = storedSize > 0 ? storedSize + savedOriginalSize : savedOriginalSize; + } + if (storedSize > 0) { + ctxt.storageUse().incrementStorageSizeRecursively(doomed.getOwner().getId(), (0L - storedSize)); + } + } /** * We *could* re-index the entire dataset but it's more efficient to * target individual files for deletion, which should always be drafts. diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java index 274aeb3c3fd..c4910dd10c2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeletePidCommand.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,7 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.util.BundleUtil; import org.apache.commons.httpclient.HttpException; @@ -38,25 +38,26 @@ public DeletePidCommand(DataverseRequest request, Dataset dataset) { protected void executeImpl(CommandContext ctxt) throws CommandException { if (!(getUser() instanceof AuthenticatedUser) || !getUser().isSuperuser()) { - throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), - this, Collections.singleton(Permission.EditDataset), dataset); + throw new PermissionException(BundleUtil.getStringFromBundle("admin.api.auth.mustBeSuperUser"), this, + Collections.singleton(Permission.EditDataset), dataset); } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(dataset.getGlobalId().getProviderId()); + try { - idServiceBean.deleteIdentifier(dataset); + pidProvider.deleteIdentifier(dataset); // Success! Clear the create time, etc. dataset.setGlobalIdCreateTime(null); dataset.setIdentifierRegistered(false); ctxt.datasets().merge(dataset); } catch (HttpException hex) { - String message = BundleUtil.getStringFromBundle("pids.deletePid.failureExpected", Arrays.asList(dataset.getGlobalId().asString(), Integer.toString(hex.getReasonCode()))); + String message = BundleUtil.getStringFromBundle("pids.deletePid.failureExpected", + Arrays.asList(dataset.getGlobalId().asString(), Integer.toString(hex.getReasonCode()))); logger.info(message); throw new IllegalCommandException(message, this); } catch (Exception ex) { - String message = BundleUtil.getStringFromBundle("pids.deletePid.failureOther", Arrays.asList(dataset.getGlobalId().asString(), ex.getLocalizedMessage())); + String message = BundleUtil.getStringFromBundle("pids.deletePid.failureOther", + Arrays.asList(dataset.getGlobalId().asString(), ex.getLocalizedMessage())); logger.info(message); throw new IllegalCommandException(message, this); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index 41093444360..be3e28029e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -2,7 +2,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.RoleAssignment; @@ -15,6 +17,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.search.IndexResponse; import java.util.ArrayList; import java.util.Collections; @@ -22,7 +26,7 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; + import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import java.io.IOException; import java.util.concurrent.Future; @@ -61,17 +65,17 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { throw new PermissionException("Destroy can only be called by superusers.", this, Collections.singleton(Permission.DeleteDatasetDraft), doomed); } + Dataset managedDoomed = ctxt.em().merge(doomed); // If there is a dedicated thumbnail DataFile, it needs to be reset // explicitly, or we'll get a constraint violation when deleting: - doomed.setThumbnailFile(null); - final Dataset managedDoomed = ctxt.em().merge(doomed); - + managedDoomed.setThumbnailFile(null); + // files need to iterate through and remove 'by hand' to avoid // optimistic lock issues... (plus the physical files need to be // deleted too!) - - Iterator dfIt = doomed.getFiles().iterator(); + DatasetVersion dv = managedDoomed.getLatestVersion(); + Iterator dfIt = managedDoomed.getFiles().iterator(); while (dfIt.hasNext()){ DataFile df = dfIt.next(); // Gather potential Solr IDs of files. As of this writing deaccessioned files are never indexed. @@ -82,50 +86,52 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.engine().submit(new DeleteDataFileCommand(df, getRequest(), true)); dfIt.remove(); } - - //also, lets delete the uploaded thumbnails! - if (!doomed.isHarvested()) { - deleteDatasetLogo(doomed); - } + dv.setFileMetadatas(null); // ASSIGNMENTS - for (RoleAssignment ra : ctxt.roles().directRoleAssignments(doomed)) { + for (RoleAssignment ra : ctxt.roles().directRoleAssignments(managedDoomed)) { ctxt.em().remove(ra); } // ROLES - for (DataverseRole ra : ctxt.roles().findByOwnerId(doomed.getId())) { + for (DataverseRole ra : ctxt.roles().findByOwnerId(managedDoomed.getId())) { ctxt.em().remove(ra); } - if (!doomed.isHarvested()) { - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - try { - if (idServiceBean.alreadyRegistered(doomed)) { - idServiceBean.deleteIdentifier(doomed); - for (DataFile df : doomed.getFiles()) { - idServiceBean.deleteIdentifier(df); + if (!managedDoomed.isHarvested()) { + //also, lets delete the uploaded thumbnails! + deleteDatasetLogo(managedDoomed); + // and remove the PID (perhaps should be after the remove in case that causes a roll-back?) + GlobalId pid = managedDoomed.getGlobalId(); + if (pid != null) { + PidProvider pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + try { + if (pidProvider.alreadyRegistered(managedDoomed)) { + pidProvider.deleteIdentifier(managedDoomed); + //Files are handled in DeleteDataFileCommand } + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); } - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); } - } + } toReIndex = managedDoomed.getOwner(); - // dataset - ctxt.em().remove(managedDoomed); - // add potential Solr IDs of datasets to list for deletion - String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId(); + String solrIdOfPublishedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId(); datasetAndFileSolrIdsToDelete.add(solrIdOfPublishedDatasetVersion); - String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.draftSuffix; + String solrIdOfDraftDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId() + IndexServiceBean.draftSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersion); String solrIdOfDraftDatasetVersionPermission = solrIdOfDraftDatasetVersion + IndexServiceBean.discoverabilityPermissionSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDraftDatasetVersionPermission); - String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + doomed.getId() + IndexServiceBean.deaccessionedSuffix; + String solrIdOfDeaccessionedDatasetVersion = IndexServiceBean.solrDocIdentifierDataset + managedDoomed.getId() + IndexServiceBean.deaccessionedSuffix; datasetAndFileSolrIdsToDelete.add(solrIdOfDeaccessionedDatasetVersion); + + // dataset + ctxt.em().remove(managedDoomed); + + } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 3da087addd9..287e877f6e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -20,6 +20,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.export.ExportService; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -30,17 +32,15 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; + import edu.harvard.iq.dataverse.batch.util.LoggingUtil; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.util.FileUtil; import java.util.ArrayList; import java.util.concurrent.Future; import org.apache.solr.client.solrj.SolrServerException; -import jakarta.ejb.EJB; -import jakarta.inject.Inject; - /** * @@ -267,7 +267,6 @@ public boolean onSuccess(CommandContext ctxt, Object r) { } catch (Exception e) { logger.warning("Failure to send dataset published messages for : " + dataset.getId() + " : " + e.getMessage()); } - ctxt.index().asyncIndexDataset(dataset, true); //re-indexing dataverses that have additional subjects if (!dataversesToIndex.isEmpty()){ @@ -297,7 +296,8 @@ public boolean onSuccess(CommandContext ctxt, Object r) { logger.log(Level.WARNING, "Finalization: exception caught while exporting: "+ex.getMessage(), ex); // ... but it is important to only update the export time stamp if the // export was indeed successful. - } + } + ctxt.index().asyncIndexDataset(dataset, true); return retVal; } @@ -350,7 +350,8 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm // (the decision was made to validate all the files on every // major release; we can revisit the decision if there's any // indication that this makes publishing take significantly longer. - if (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize) { + String driverId = FileUtil.getStorageDriver(dataFile); + if(StorageIO.isDataverseAccessible(driverId) && (maxFileSize == -1 || dataFile.getFilesize() < maxFileSize)) { FileUtil.validateDataFileChecksum(dataFile); } else { @@ -385,56 +386,52 @@ private void validateDataFiles(Dataset dataset, CommandContext ctxt) throws Comm } private void publicizeExternalIdentifier(Dataset dataset, CommandContext ctxt) throws CommandException { - String protocol = getDataset().getProtocol(); - String authority = getDataset().getAuthority(); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); - - if (idServiceBean != null) { - - try { - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()); - // We will skip trying to register the global identifiers for datafiles - // if "dependent" file-level identifiers are requested, AND the naming - // protocol, or the authority of the dataset global id is different from - // what's currently configured for the Dataverse. In other words - // we can't get "dependent" DOIs assigned to files in a dataset - // with the registered id that is a handle; or even a DOI, but in - // an authority that's different from what's currently configured. - // Additionaly in 4.9.3 we have added a system variable to disable - // registering file PIDs on the installation level. - if (((currentGlobalIdProtocol.equals(protocol) && currentGlobalAuthority.equals(authority)) - || dataFilePIDFormat.equals("INDEPENDENT")) - && isFilePIDsEnabled - && dataset.getLatestVersion().getMinorVersionNumber() != null - && dataset.getLatestVersion().getMinorVersionNumber().equals((long) 0)) { - //A false return value indicates a failure in calling the service - for (DataFile df : dataset.getFiles()) { - logger.log(Level.FINE, "registering global id for file {0}", df.getId()); - //A false return value indicates a failure in calling the service - if (!idServiceBean.publicizeIdentifier(df)) { - throw new Exception(); - } - df.setGlobalIdCreateTime(getTimestamp()); - df.setIdentifierRegistered(true); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + try { + // We will skip trying to register the global identifiers for datafiles + // if "dependent" file-level identifiers are requested, AND the naming + // protocol, or the authority of the dataset global id is different from + // what's currently configured for the Dataverse. In other words + // we can't get "dependent" DOIs assigned to files in a dataset + // with the registered id that is a handle; or even a DOI, but in + // an authority that's different from what's currently configured. + // Additionaly in 4.9.3 we have added a system variable to disable + // registering file PIDs on the installation level. + boolean registerGlobalIdsForFiles = ctxt.systemConfig().isFilePIDsEnabledForCollection( + getDataset().getOwner()) + && pidProvider.canCreatePidsLike(dataset.getGlobalId()); + + if (registerGlobalIdsForFiles + && dataset.getLatestVersion().getMinorVersionNumber() != null + && dataset.getLatestVersion().getMinorVersionNumber().equals((long) 0)) { + // A false return value indicates a failure in calling the service + for (DataFile df : dataset.getFiles()) { + logger.log(Level.FINE, "registering global id for file {0}", df.getId()); + // A false return value indicates a failure in calling the service + if (!pidProvider.publicizeIdentifier(df)) { + throw new Exception(); } + df.setGlobalIdCreateTime(getTimestamp()); + df.setIdentifierRegistered(true); } - if (!idServiceBean.publicizeIdentifier(dataset)) { - throw new Exception(); - } - dataset.setGlobalIdCreateTime(new Date()); // TODO these two methods should be in the responsibility of the idServiceBean. - dataset.setIdentifierRegistered(true); - } catch (Throwable e) { - logger.warning("Failed to register the identifier "+dataset.getGlobalId().asString()+", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); - - // Send failure notification to the user: - notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); - - ctxt.datasets().removeDatasetLocks(dataset, DatasetLock.Reason.finalizePublication); - throw new CommandException(BundleUtil.getStringFromBundle("dataset.publish.error", idServiceBean.getProviderInformation()), this); } + if (!pidProvider.publicizeIdentifier(dataset)) { + throw new Exception(); + } + dataset.setGlobalIdCreateTime(new Date()); // TODO these two methods should be in the responsibility of the + // pidProvider. + dataset.setIdentifierRegistered(true); + } catch (Throwable e) { + logger.warning("Failed to register the identifier " + dataset.getGlobalId().asString() + + ", or to register a file in the dataset; notifying the user(s), unlocking the dataset"); + + // Send failure notification to the user: + notifyUsersDatasetPublishStatus(ctxt, dataset, UserNotification.Type.PUBLISHFAILED_PIDREG); + + ctxt.datasets().removeDatasetLocks(dataset, DatasetLock.Reason.finalizePublication); + throw new CommandException( + BundleUtil.getStringFromBundle("dataset.publish.error", pidProvider.getProviderInformation()), + this); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java new file mode 100644 index 00000000000..49f14e7c280 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionQuotaCommand.java @@ -0,0 +1,51 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.logging.Logger; + +/** + * + * @author landreev + * The command doesn't do much. It's sole purpose is to check the permissions + * when it's called by the /api/dataverses/.../storage/quota api. + */ +// @RequiredPermissions - none defined, dynamic +public class GetCollectionQuotaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionQuotaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetCollectionQuotaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (dataverse != null && dataverse.getStorageQuota() != null) { + return dataverse.getStorageQuota().getAllocation(); + } + + return null; + } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } +} + + diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java new file mode 100644 index 00000000000..c30a5a34a81 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetCollectionStorageUseCommand.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@RequiredPermissions(Permission.ManageDataversePermissions) +// alternatively, we could make it dynamic - public for published collections +// and Permission.ViewUnpublishedDataverse required otherwise (?) +public class GetCollectionStorageUseCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetCollectionStorageUseCommand.class.getCanonicalName()); + + private final Dataverse collection; + + public GetCollectionStorageUseCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + collection = target; + } + + @Override + public Long execute(CommandContext ctxt) throws CommandException { + + if (collection == null) { + throw new CommandException("null collection passed to get storage use command", this); + } + return ctxt.storageUse().findStorageSizeByDvContainerId(collection.getId()); + } + + /*@Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + }*/ +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java index fdf47bbd2dd..369f3cbfda6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDataFileCommand.java @@ -11,35 +11,34 @@ import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + import java.util.Collections; import java.util.Map; import java.util.Set; /** - * * @author Matthew */ // no annotations here, since permissions are dynamically decided // based off GetDatasetCommand for similar permissions checking public class GetDataFileCommand extends AbstractCommand { - private final DataFile df; + private final DataFile dataFile; - public GetDataFileCommand(DataverseRequest aRequest, DataFile anAffectedDataset) { - super(aRequest, anAffectedDataset); - df = anAffectedDataset; + public GetDataFileCommand(DataverseRequest aRequest, DataFile dataFile) { + super(aRequest, dataFile); + this.dataFile = dataFile; } @Override public DataFile execute(CommandContext ctxt) throws CommandException { - return df; + return dataFile; } @Override public Map> getRequiredPermissions() { return Collections.singletonMap("", - df.isReleased() ? Collections.emptySet() - : Collections.singleton(Permission.ViewUnpublishedDataset)); + dataFile.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataset)); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java new file mode 100644 index 00000000000..2d5e1251614 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java @@ -0,0 +1,38 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class GetDatasetSchemaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java index 14999548b34..1d83f0dd1f4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDraftFileMetadataIfAvailableCommand.java @@ -1,8 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; @@ -12,25 +10,19 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; /** - * * @author Matthew */ -@RequiredPermissions( Permission.ViewUnpublishedDataset ) -public class GetDraftFileMetadataIfAvailableCommand extends AbstractCommand{ - private final DataFile df; +@RequiredPermissions(Permission.ViewUnpublishedDataset) +public class GetDraftFileMetadataIfAvailableCommand extends AbstractCommand { + private final DataFile dataFile; - public GetDraftFileMetadataIfAvailableCommand(DataverseRequest aRequest, DataFile dataFile) { - super(aRequest, dataFile); - df = dataFile; + public GetDraftFileMetadataIfAvailableCommand(DataverseRequest request, DataFile dataFile) { + super(request, dataFile); + this.dataFile = dataFile; } @Override public FileMetadata execute(CommandContext ctxt) throws CommandException { - FileMetadata fm = df.getLatestFileMetadata(); - if(fm.getDatasetVersion().getVersionState().equals(DatasetVersion.VersionState.DRAFT)) { - return df.getLatestFileMetadata(); - } - return null; + return dataFile.getDraftFileMetadata(); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java index 1454a4b1fdd..431b3ff47c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleDatasetVersionCommand.java @@ -25,22 +25,25 @@ public class GetLatestAccessibleDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final boolean includeDeaccessioned; + private boolean checkPermsWhenDeaccessioned; public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { - this(aRequest, anAffectedDataset, false); + this(aRequest, anAffectedDataset,false, false); } - public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) { + public GetLatestAccessibleDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; this.includeDeaccessioned = includeDeaccessioned; + this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned; } @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - if (ds.getLatestVersion().isDraft() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) { + if (ds.getLatestVersion().isDraft() && + ctxt.permissions().requestOn(getRequest(), ds).has(Permission.ViewUnpublishedDataset)) { return ctxt.engine().submit(new GetDraftDatasetVersionCommand(getRequest(), ds)); } - return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned)); + return ctxt.engine().submit(new GetLatestPublishedDatasetVersionCommand(getRequest(), ds, includeDeaccessioned, checkPermsWhenDeaccessioned)); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java new file mode 100644 index 00000000000..05f3c73bde0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestAccessibleFileMetadataCommand.java @@ -0,0 +1,30 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class GetLatestAccessibleFileMetadataCommand extends AbstractGetPublishedFileMetadataCommand { + + public GetLatestAccessibleFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile, includeDeaccessioned); + } + + @Override + public FileMetadata execute(CommandContext ctxt) throws CommandException { + FileMetadata fileMetadata = null; + + if (ctxt.permissions().requestOn(getRequest(), dataFile.getOwner()).has(Permission.ViewUnpublishedDataset)) { + fileMetadata = dataFile.getDraftFileMetadata(); + } + + if (fileMetadata == null) { + fileMetadata = getLatestPublishedFileMetadata(ctxt); + } + + return fileMetadata; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java index 4e4252fd155..0afcbe2d0bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedDatasetVersionCommand.java @@ -17,24 +17,49 @@ public class GetLatestPublishedDatasetVersionCommand extends AbstractCommand { private final Dataset ds; private final boolean includeDeaccessioned; + private final boolean checkPermsWhenDeaccessioned; public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset) { - this(aRequest, anAffectedDataset, false); + this(aRequest, anAffectedDataset, false, false); } - public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned) { + public GetLatestPublishedDatasetVersionCommand(DataverseRequest aRequest, Dataset anAffectedDataset, boolean includeDeaccessioned, boolean checkPermsWhenDeaccessioned) { super(aRequest, anAffectedDataset); ds = anAffectedDataset; this.includeDeaccessioned = includeDeaccessioned; + this.checkPermsWhenDeaccessioned = checkPermsWhenDeaccessioned; } + /* + * This command depending on the requested parameters will return: + * + * If the user requested to include a deaccessioned dataset with the files, the command will return the deaccessioned version if the user has permissions to view the files. Otherwise, it will return null. + * If the user requested to include a deaccessioned dataset but did not request the files, the command will return the deaccessioned version. + * If the user did not request to include a deaccessioned dataset, the command will return the latest published version. + * + */ @Override public DatasetVersion execute(CommandContext ctxt) throws CommandException { - for (DatasetVersion dsv : ds.getVersions()) { - if (dsv.isReleased() || (includeDeaccessioned && dsv.isDeaccessioned() && ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset))) { - return dsv; + DatasetVersion dsVersionResult = getReleaseOrDeaccessionedDatasetVersion(); + if (dsVersionResult != null && userHasPermissionsOnDatasetVersion(dsVersionResult, checkPermsWhenDeaccessioned, ctxt, ds)) { + return dsVersionResult; + } + return null; + } + + private DatasetVersion getReleaseOrDeaccessionedDatasetVersion() { + for (DatasetVersion dsVersion : ds.getVersions()) { + if (dsVersion.isReleased() || (includeDeaccessioned && dsVersion.isDeaccessioned())) { + return dsVersion; } } return null; } + + private boolean userHasPermissionsOnDatasetVersion(DatasetVersion dsVersionResult, boolean checkPermsWhenDeaccessioned, CommandContext ctxt, Dataset ds) { + if (dsVersionResult.isDeaccessioned() && checkPermsWhenDeaccessioned) { + return ctxt.permissions().requestOn(getRequest(), ds).has(Permission.EditDataset); + } + return true; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java new file mode 100644 index 00000000000..fc13dba1a34 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetLatestPublishedFileMetadataCommand.java @@ -0,0 +1,19 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +public class GetLatestPublishedFileMetadataCommand extends AbstractGetPublishedFileMetadataCommand { + + public GetLatestPublishedFileMetadataCommand(DataverseRequest request, DataFile dataFile, boolean includeDeaccessioned) { + super(request, dataFile, includeDeaccessioned); + } + + @Override + public FileMetadata execute(CommandContext ctxt) throws CommandException { + return getLatestPublishedFileMetadata(ctxt); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java index 2de2adff099..b98cd70a4da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetProvJsonCommand.java @@ -9,12 +9,12 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.json.JsonUtil; + import java.io.IOException; import java.io.InputStream; import java.util.logging.Logger; -import jakarta.json.Json; import jakarta.json.JsonObject; -import jakarta.json.JsonReader; @RequiredPermissions(Permission.EditDataset) public class GetProvJsonCommand extends AbstractCommand { @@ -35,13 +35,13 @@ public JsonObject execute(CommandContext ctxt) throws CommandException { try { StorageIO dataAccess = dataFile.getStorageIO(); - InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension); - JsonObject jsonObject = null; - if(null != inputStream) { - JsonReader jsonReader = Json.createReader(inputStream); - jsonObject = jsonReader.readObject(); + try (InputStream inputStream = dataAccess.getAuxFileAsInputStream(provJsonExtension)) { + JsonObject jsonObject = null; + if (null != inputStream) { + jsonObject = JsonUtil.getJsonObject(inputStream); + } + return jsonObject; } - return jsonObject; } catch (IOException ex) { String error = "Exception caught in DataAccess.getStorageIO(dataFile) getting file. Error: " + ex; throw new IllegalCommandException(error, this); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java index a87eb8a99a5..07256f057e2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetSpecificPublishedDatasetVersionCommand.java @@ -25,23 +25,36 @@ public class GetSpecificPublishedDatasetVersionCommand extends AbstractCommand isRequestedVersionFileMetadata(fileMetadata, ctxt)) + .findFirst() + .orElse(null); + } + + private boolean isRequestedVersionFileMetadata(FileMetadata fileMetadata, CommandContext ctxt) { + DatasetVersion datasetVersion = fileMetadata.getDatasetVersion(); + return isDatasetVersionAccessible(datasetVersion, dataFile.getOwner(), ctxt) + && datasetVersion.getVersionNumber().equals(majorVersion) + && datasetVersion.getMinorVersionNumber().equals(minorVersion); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java index 42af43b7247..ec8c8976260 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GrantSuperuserStatusCommand.java @@ -14,7 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; /** * diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java index 478272950bd..772c989264c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ImportDatasetCommand.java @@ -1,14 +1,14 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import java.io.IOException; import java.util.Collections; @@ -80,9 +80,9 @@ protected void additionalParameterTests(CommandContext ctxt) throws CommandExcep * Dataverse) but aren't findable to be used. That could be the case if, for * example, someone was importing a draft dataset from elsewhere. */ - GlobalIdServiceBean globalIdServiceBean = GlobalIdServiceBean.getBean(ds.getProtocol(), ctxt); - if (globalIdServiceBean != null) { - if (globalIdServiceBean.alreadyRegistered(ds.getGlobalId(), true)) { + PidProvider pidProvider = PidUtil.getPidProvider(ds.getGlobalId().getProviderId()); + if (pidProvider != null) { + if (pidProvider.alreadyRegistered(ds.getGlobalId(), true)) { return; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListFeaturedCollectionsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListFeaturedCollectionsCommand.java new file mode 100644 index 00000000000..4dca522e499 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListFeaturedCollectionsCommand.java @@ -0,0 +1,50 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseFeaturedDataverse; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * + * @author stephenkraffmiller + */ +public class ListFeaturedCollectionsCommand extends AbstractCommand> { + + private final Dataverse dv; + + public ListFeaturedCollectionsCommand(DataverseRequest aRequest, Dataverse aDataverse) { + super(aRequest, aDataverse); + dv = aDataverse; + } + + @Override + public List execute(CommandContext ctxt) throws CommandException { + List featuredTarget = new ArrayList<>(); + List featuredList = ctxt.featuredDataverses().findByDataverseId(dv.getId()); + for (DataverseFeaturedDataverse dfd : featuredList) { + Dataverse fd = dfd.getFeaturedDataverse(); + featuredTarget.add(fd); + } + return featuredTarget; + + } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + dv.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlocksCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlocksCommand.java index 912318cf155..8275533ced2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlocksCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListMetadataBlocksCommand.java @@ -7,6 +7,7 @@ import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + import java.util.Collections; import java.util.List; import java.util.Map; @@ -14,29 +15,40 @@ /** * Lists the metadata blocks of a {@link Dataverse}. - * + * * @author michael */ // no annotations here, since permissions are dynamically decided -public class ListMetadataBlocksCommand extends AbstractCommand>{ - - private final Dataverse dv; - - public ListMetadataBlocksCommand(DataverseRequest aRequest, Dataverse aDataverse) { - super(aRequest, aDataverse); - dv = aDataverse; +public class ListMetadataBlocksCommand extends AbstractCommand> { + + private final Dataverse dataverse; + private final boolean onlyDisplayedOnCreate; + + public ListMetadataBlocksCommand(DataverseRequest request, Dataverse dataverse, boolean onlyDisplayedOnCreate) { + super(request, dataverse); + this.dataverse = dataverse; + this.onlyDisplayedOnCreate = onlyDisplayedOnCreate; } @Override public List execute(CommandContext ctxt) throws CommandException { - return dv.getMetadataBlocks(); + if (onlyDisplayedOnCreate) { + return listMetadataBlocksDisplayedOnCreate(ctxt, dataverse); + } + return dataverse.getMetadataBlocks(); } - + + private List listMetadataBlocksDisplayedOnCreate(CommandContext ctxt, Dataverse dataverse) { + if (dataverse.isMetadataBlockRoot() || dataverse.getOwner() == null) { + return ctxt.metadataBlocks().listMetadataBlocksDisplayedOnCreate(dataverse); + } + return listMetadataBlocksDisplayedOnCreate(ctxt, dataverse.getOwner()); + } + @Override public Map> getRequiredPermissions() { return Collections.singletonMap("", - dv.isReleased() ? Collections.emptySet() - : Collections.singleton(Permission.ViewUnpublishedDataverse)); - } - + dataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java index 1858ba377ab..b619d32cc7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ListRoleAssignments.java @@ -6,16 +6,18 @@ import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import java.util.ArrayList; import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Collections; /** * * @author michael */ -@RequiredPermissions( Permission.ManageDataversePermissions ) +// no annotations here, since permissions are dynamically decided public class ListRoleAssignments extends AbstractCommand> { private final DvObject definitionPoint; @@ -34,5 +36,12 @@ public List execute(CommandContext ctxt) throws CommandException } return ctxt.permissions().assignmentsOn(definitionPoint); } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + definitionPoint.isInstanceofDataset() ? Collections.singleton(Permission.ManageDatasetPermissions) + : Collections.singleton(Permission.ManageDataversePermissions)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MergeInAccountCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MergeInAccountCommand.java index 1ec51764d73..03f4dceef88 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MergeInAccountCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/MergeInAccountCommand.java @@ -14,7 +14,6 @@ import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.authorization.AuthenticatedUserLookup; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; -import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2TokenData; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -25,7 +24,6 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.passwordreset.PasswordResetData; import edu.harvard.iq.dataverse.search.IndexResponse; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import edu.harvard.iq.dataverse.workflows.WorkflowComment; @@ -177,6 +175,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.em().createNativeQuery("Delete from OAuth2TokenData where user_id ="+consumedAU.getId()).executeUpdate(); + ctxt.em().createNativeQuery("DELETE FROM explicitgroup_authenticateduser consumed USING explicitgroup_authenticateduser ongoing WHERE consumed.containedauthenticatedusers_id="+ongoingAU.getId()+" AND ongoing.containedauthenticatedusers_id="+consumedAU.getId()).executeUpdate(); ctxt.em().createNativeQuery("UPDATE explicitgroup_authenticateduser SET containedauthenticatedusers_id="+ongoingAU.getId()+" WHERE containedauthenticatedusers_id="+consumedAU.getId()).executeUpdate(); ctxt.actionLog().changeUserIdentifierInHistory(consumedAU.getIdentifier(), ongoingAU.getIdentifier()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index f5ef121dee2..6b95f3b6de1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLock; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.Command; @@ -11,6 +10,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -130,24 +130,15 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException // ... // Additionaly in 4.9.3 we have added a system variable to disable // registering file PIDs on the installation level. - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority= ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean registerGlobalIdsForFiles = - (currentGlobalIdProtocol.equals(theDataset.getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && ctxt.systemConfig().isFilePIDsEnabledForCollection(theDataset.getOwner()); - - if ( registerGlobalIdsForFiles ){ - registerGlobalIdsForFiles = currentGlobalAuthority.equals( theDataset.getAuthority() ); - } + ctxt.systemConfig().isFilePIDsEnabledForCollection(getDataset().getOwner()) && + ctxt.dvObjects().getEffectivePidGenerator(getDataset()).canCreatePidsLike(getDataset().getGlobalId()); boolean validatePhysicalFiles = ctxt.systemConfig().isDatafileValidationOnPublishEnabled(); // As of v5.0, publishing a dataset is always done asynchronously, // with the dataset locked for the duration of the operation. - //if ((registerGlobalIdsForFiles || validatePhysicalFiles) - // && theDataset.getFiles().size() > ctxt.systemConfig().getPIDAsynchRegFileCount()) { String info = "Publishing the dataset; "; info += registerGlobalIdsForFiles ? "Registering PIDs for Datafiles; " : ""; @@ -178,15 +169,6 @@ public PublishDatasetResult execute(CommandContext ctxt) throws CommandException // method: //ctxt.datasets().callFinalizePublishCommandAsynchronously(theDataset.getId(), ctxt, request, datasetExternallyReleased); return new PublishDatasetResult(theDataset, Status.Inprogress); - - /** - * Code for for "synchronous" (while-you-wait) publishing - * is preserved below, commented out: - } else { - // Synchronous publishing (no workflow involved) - theDataset = ctxt.engine().submit(new FinalizeDatasetPublicationCommand(theDataset, getRequest(),datasetExternallyReleased)); - return new PublishDatasetResult(theDataset, Status.Completed); - } */ } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java index 779bc7fb7fe..7b80871a1e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RegisterDvObjectCommand.java @@ -4,20 +4,17 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.DvObjectContainer; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; + import java.sql.Timestamp; import java.util.Date; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; -import edu.harvard.iq.dataverse.batch.util.LoggingUtil; -import java.io.IOException; -import org.apache.solr.client.solrj.SolrServerException; /** * @@ -44,48 +41,37 @@ public RegisterDvObjectCommand(DataverseRequest aRequest, DvObject target, Boole @Override protected void executeImpl(CommandContext ctxt) throws CommandException { + DvObjectContainer container = (target instanceof DvObjectContainer) ? (DvObjectContainer) target : target.getOwner(); + // Get the pidProvider that is configured to mint new IDs + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(container); if(this.migrateHandle){ //Only continue if you can successfully migrate the handle - if (!processMigrateHandle(ctxt)) return; + if (HandlePidProvider.HDL_PROTOCOL.equals(pidProvider.getProtocol()) || !processMigrateHandle(ctxt)) return; } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - String authority = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Authority, nonNullDefaultIfKeyNotFound); - // Get the idServiceBean that is configured to mint new IDs - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + try { //Test to see if identifier already present //if so, leave. if (target.getIdentifier() == null || target.getIdentifier().isEmpty()) { - if (target.isInstanceofDataset()) { - target.setIdentifier(idServiceBean.generateDatasetIdentifier((Dataset) target)); - - } else { - target.setIdentifier(idServiceBean.generateDataFileIdentifier((DataFile) target)); - } - if (target.getProtocol() == null) { - target.setProtocol(protocol); - } - if (target.getAuthority() == null) { - target.setAuthority(authority); - } + pidProvider.generatePid(target); } - if (idServiceBean.alreadyRegistered(target)) { + + if (pidProvider.alreadyRegistered(target)) { return; } - String doiRetString = idServiceBean.createIdentifier(target); + String doiRetString = pidProvider.createIdentifier(target); if (doiRetString != null && doiRetString.contains(target.getIdentifier())) { - if (!idServiceBean.registerWhenPublished()) { + if (!pidProvider.registerWhenPublished()) { // Should register ID before publicize() is called - // For example, DOIEZIdServiceBean tries to recreate the id if the identifier isn't registered before + // For example, DOIEZIdProvider tries to recreate the id if the identifier isn't registered before // publicizeIdentifier is called target.setIdentifierRegistered(true); target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); } if (target.isReleased()) { - idServiceBean.publicizeIdentifier(target); + pidProvider.publicizeIdentifier(target); } - if (idServiceBean.registerWhenPublished() && target.isReleased()) { + if (pidProvider.registerWhenPublished() && target.isReleased()) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); target.setIdentifierRegistered(true); } @@ -95,27 +81,21 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { Dataset dataset = (Dataset) target; for (DataFile df : dataset.getFiles()) { if (df.getIdentifier() == null || df.getIdentifier().isEmpty()) { - df.setIdentifier(idServiceBean.generateDataFileIdentifier(df)); - if (df.getProtocol() == null || df.getProtocol().isEmpty()) { - df.setProtocol(protocol); - } - if (df.getAuthority() == null || df.getAuthority().isEmpty()) { - df.setAuthority(authority); - } + pidProvider.generatePid(df); } - doiRetString = idServiceBean.createIdentifier(df); + doiRetString = pidProvider.createIdentifier(df); if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { - if (!idServiceBean.registerWhenPublished()) { + if (!pidProvider.registerWhenPublished()) { // Should register ID before publicize() is called - // For example, DOIEZIdServiceBean tries to recreate the id if the identifier isn't registered before + // For example, DOIEZIdProvider tries to recreate the id if the identifier isn't registered before // publicizeIdentifier is called df.setIdentifierRegistered(true); df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); } if (df.isReleased()) { - idServiceBean.publicizeIdentifier(df); + pidProvider.publicizeIdentifier(df); } - if (idServiceBean.registerWhenPublished() && df.isReleased()) { + if (pidProvider.registerWhenPublished() && df.isReleased()) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); df.setIdentifierRegistered(true); } @@ -145,7 +125,7 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { private Boolean processMigrateHandle (CommandContext ctxt){ boolean retval = true; if(!target.isInstanceofDataset()) return false; - if(!target.getProtocol().equals(HandlenetServiceBean.HDL_PROTOCOL)) return false; + if(!target.getProtocol().equals(HandlePidProvider.HDL_PROTOCOL)) return false; AlternativePersistentIdentifier api = new AlternativePersistentIdentifier(); api.setProtocol(target.getProtocol()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java index a29e7fdd59c..6b7baa7d01b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/RequestRsyncScriptCommand.java @@ -27,6 +27,7 @@ * "actiontype" in the actionlogrecord rather than "InternalError" if you throw * a CommandExecutionException. */ +@Deprecated(forRemoval = true, since = "2024-07-07") @RequiredPermissions(Permission.EditDataset) public class RequestRsyncScriptCommand extends AbstractCommand { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java index 6b2872f3397..b7e3ddd8ce6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReservePidCommand.java @@ -1,7 +1,6 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,6 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.Arrays; @@ -41,11 +42,10 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { this, Collections.singleton(Permission.EditDataset), dataset); } - String nonNullDefaultIfKeyNotFound = ""; - String protocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, nonNullDefaultIfKeyNotFound); - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(protocol, ctxt); + PidProvider pidProvider = ctxt.dvObjects().getEffectivePidGenerator(dataset); + try { - String returnString = idServiceBean.createIdentifier(dataset); + String returnString = pidProvider.createIdentifier(dataset); logger.fine(returnString); // No errors caught, so mark PID as reserved. dataset.setGlobalIdCreateTime(new Date()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java index caf37ad4de1..8d8fddeda6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommand.java @@ -11,6 +11,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.io.IOException; @@ -25,6 +26,11 @@ public class ReturnDatasetToAuthorCommand extends AbstractDatasetCommand 0) { + ctxt.storageUse().incrementStorageSizeRecursively(uningest.getOwner().getId(), (0L - archivalFileSize)); + } + } + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + + return true; + } private void resetIngestStats(DataFile uningest, CommandContext ctxt){ diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java index 1f5989c9e08..5a0ae7cbf5d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java @@ -10,10 +10,12 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.sql.Timestamp; import java.util.Collections; import java.util.Date; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; /** * @@ -36,15 +38,15 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { throw new PermissionException("Update Target URL can only be called by superusers.", this, Collections.singleton(Permission.EditDataset), target); } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(target.getProtocol(), ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(target.getGlobalId().getProviderId()); try { - String doiRetString = idServiceBean.modifyIdentifierTargetURL(target); + String doiRetString = pidProvider.modifyIdentifierTargetURL(target); if (doiRetString != null && doiRetString.contains(target.getIdentifier())) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(target); ctxt.em().flush(); for (DataFile df : target.getFiles()) { - doiRetString = idServiceBean.modifyIdentifierTargetURL(df); + doiRetString = pidProvider.modifyIdentifierTargetURL(df); if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(df); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommand.java index 3f4b3c36b70..b8c70ec6c46 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommand.java @@ -11,12 +11,14 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.commons.io.IOUtils; @@ -59,17 +61,18 @@ public DatasetThumbnail execute(CommandContext ctxt) throws CommandException { // throw new CommandException("Just testing what an error would look like in the GUI.", this); // } if (userIntent == null) { - throw new IllegalCommandException("No changes to save.", this); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.noChange"), this); } switch (userIntent) { case setDatasetFileAsThumbnail: if (dataFileIdSupplied == null) { - throw new CommandException("A file was not selected to be the new dataset thumbnail.", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.fileNotSupplied"), this); } DataFile datasetFileThumbnailToSwitchTo = ctxt.files().find(dataFileIdSupplied); if (datasetFileThumbnailToSwitchTo == null) { - throw new CommandException("Could not find file based on id supplied: " + dataFileIdSupplied + ".", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.fileNotFound", + List.of(dataFileIdSupplied.toString())), this); } Dataset ds1 = ctxt.datasets().setDatasetFileAsThumbnail(dataset, datasetFileThumbnailToSwitchTo); DatasetThumbnail datasetThumbnail = ds1.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); @@ -79,11 +82,12 @@ public DatasetThumbnail execute(CommandContext ctxt) throws CommandException { if (dataFile.getId().equals(dataFileIdSupplied)) { return datasetThumbnail; } else { - throw new CommandException("Dataset thumbnail is should be based on file id " + dataFile.getId() + " but instead it is " + dataFileIdSupplied + ".", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.basedOnWrongFileId", + List.of(String.valueOf(dataFile.getId()),String.valueOf(dataFileIdSupplied))), this); } } } else { - throw new CommandException("Dataset thumbnail is unexpectedly absent.", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.missing"), this); } case setNonDatasetFileAsThumbnail: @@ -91,14 +95,14 @@ public DatasetThumbnail execute(CommandContext ctxt) throws CommandException { try { uploadedFile = FileUtil.inputStreamToFile(inputStream); } catch (IOException ex) { - throw new CommandException("In setNonDatasetFileAsThumbnail caught exception calling inputStreamToFile: " + ex, this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.inputStreamToFile.exception", List.of(ex.getMessage())), this); } if (uploadedFile == null) { - throw new CommandException("In setNonDatasetFileAsThumbnail uploadedFile was null.", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.nonDatasetsFileIsNull"), this); } long uploadLogoSizeLimit = ctxt.systemConfig().getUploadLogoSizeLimit(); if (uploadedFile.length() > uploadLogoSizeLimit) { - throw new IllegalCommandException("File is larger than maximum size: " + uploadLogoSizeLimit + ".", this); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.fileToLarge", List.of(String.valueOf(uploadLogoSizeLimit))), this); } FileInputStream fileAsStream = null; try { @@ -107,23 +111,25 @@ public DatasetThumbnail execute(CommandContext ctxt) throws CommandException { Logger.getLogger(UpdateDatasetThumbnailCommand.class.getName()).log(Level.SEVERE, null, ex); } Dataset datasetWithNewThumbnail = ctxt.datasets().setNonDatasetFileAsThumbnail(dataset, fileAsStream); - IOUtils.closeQuietly(fileAsStream); + IOUtils.closeQuietly(fileAsStream); if (datasetWithNewThumbnail != null) { - return datasetWithNewThumbnail.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); - } else { - return null; + DatasetThumbnail thumbnail = datasetWithNewThumbnail.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); + if (thumbnail != null) { + return thumbnail; + } } + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.nonDatasetFailed"), this); case removeThumbnail: - Dataset ds2 = ctxt.datasets().removeDatasetThumbnail(dataset); + Dataset ds2 = ctxt.datasets().clearDatasetLevelThumbnail(dataset); DatasetThumbnail datasetThumbnail2 = ds2.getDatasetThumbnail(ImageThumbConverter.DEFAULT_CARDIMAGE_SIZE); if (datasetThumbnail2 == null) { return null; } else { - throw new CommandException("User wanted to remove the thumbnail it still has one!", this); + throw new CommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.notDeleted"), this); } default: - throw new IllegalCommandException("Whatever you are trying to do to the dataset thumbnail is not supported.", this); + throw new IllegalCommandException(BundleUtil.getStringFromBundle("datasets.api.thumbnail.actionNotSupported"), this); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 7591bebe796..994f4c7dfb6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -1,6 +1,12 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.DataFileCategory; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetLock; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersionDifference; +import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.CommandContext; @@ -8,7 +14,6 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -115,8 +120,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. checkSystemMetadataKeyIfNeeded(getDataset().getOrCreateEditVersion(fmVarMet), persistedVersion); - - + + getDataset().getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); + + registerExternalVocabValuesIfAny(ctxt, getDataset().getOrCreateEditVersion(fmVarMet)); + try { // Invariant: Dataset has no locks preventing the update String lockInfoMessage = "saving current edits"; @@ -256,7 +264,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { ctxt.ingest().recalculateDatasetVersionUNF(theDataset.getOrCreateEditVersion()); } - theDataset.getOrCreateEditVersion().setLastUpdateTime(getTimestamp()); theDataset.setModificationTime(getTimestamp()); savedDataset = ctxt.em().merge(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index 56c76f04c05..bdb69dc918f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -18,7 +18,6 @@ import java.util.ArrayList; import java.util.List; import java.util.logging.Logger; -import jakarta.persistence.TypedQuery; /** * Update an existing dataverse. @@ -30,8 +29,10 @@ public class UpdateDataverseCommand extends AbstractCommand { private final Dataverse editedDv; private final List facetList; - private final List featuredDataverseList; - private final List inputLevelList; + private final List featuredDataverseList; + private final List inputLevelList; + + private boolean datasetsReindexRequired = false; public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList, DataverseRequest aRequest, List inputLevelList ) { @@ -74,9 +75,13 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } } - DataverseType oldDvType = ctxt.dataverses().find(editedDv.getId()).getDataverseType(); - String oldDvAlias = ctxt.dataverses().find(editedDv.getId()).getAlias(); - String oldDvName = ctxt.dataverses().find(editedDv.getId()).getName(); + Dataverse oldDv = ctxt.dataverses().find(editedDv.getId()); + + DataverseType oldDvType = oldDv.getDataverseType(); + String oldDvAlias = oldDv.getAlias(); + String oldDvName = oldDv.getName(); + oldDv = null; + Dataverse result = ctxt.dataverses().save(editedDv); if ( facetList != null ) { @@ -101,6 +106,14 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } } + // We don't want to reindex the children datasets unnecessarily: + // When these values are changed we need to reindex all children datasets + // This check is not recursive as all the values just report the immediate parent + if (!oldDvType.equals(editedDv.getDataverseType()) + || !oldDvName.equals(editedDv.getName()) + || !oldDvAlias.equals(editedDv.getAlias())) { + datasetsReindexRequired = true; + } return result; } @@ -110,9 +123,16 @@ public boolean onSuccess(CommandContext ctxt, Object r) { // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle + // It appears that we at some point lost some extra logic here, where + // we only reindex the underlying datasets if one or more of the specific set + // of fields have been changed (since these values are included in the + // indexed solr documents for dataasets). So I'm putting that back. -L.A. Dataverse result = (Dataverse) r; - List datasets = ctxt.datasets().findByOwnerId(result.getId()); - ctxt.index().asyncIndexDatasetList(datasets, true); + + if (datasetsReindexRequired) { + List datasets = ctxt.datasets().findByOwnerId(result.getId()); + ctxt.index().asyncIndexDatasetList(datasets, true); + } return ctxt.dataverses().index((Dataverse) r); } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java new file mode 100644 index 00000000000..cf7b4a6f69c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseInputLevelsCommand.java @@ -0,0 +1,51 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevel; +import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +import java.util.ArrayList; +import java.util.List; + +@RequiredPermissions(Permission.EditDataverse) +public class UpdateDataverseInputLevelsCommand extends AbstractCommand { + private final Dataverse dataverse; + private final List inputLevelList; + + public UpdateDataverseInputLevelsCommand(Dataverse dataverse, DataverseRequest request, List inputLevelList) { + super(request, dataverse); + this.dataverse = dataverse; + this.inputLevelList = new ArrayList<>(inputLevelList); + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + if (inputLevelList == null || inputLevelList.isEmpty()) { + throw new CommandException("Error while updating dataverse input levels: Input level list cannot be null or empty", this); + } + addInputLevelMetadataBlocks(); + dataverse.setMetadataBlockRoot(true); + return ctxt.engine().submit(new UpdateDataverseCommand(dataverse, null, null, getRequest(), inputLevelList)); + } + + private void addInputLevelMetadataBlocks() { + List dataverseMetadataBlocks = dataverse.getMetadataBlocks(); + for (DataverseFieldTypeInputLevel inputLevel : inputLevelList) { + MetadataBlock inputLevelMetadataBlock = inputLevel.getDatasetFieldType().getMetadataBlock(); + if (!dataverseHasMetadataBlock(dataverseMetadataBlocks, inputLevelMetadataBlock)) { + dataverseMetadataBlocks.add(inputLevelMetadataBlock); + } + } + dataverse.setMetadataBlocks(dataverseMetadataBlocks); + } + + private boolean dataverseHasMetadataBlock(List dataverseMetadataBlocks, MetadataBlock metadataBlock) { + return dataverseMetadataBlocks.stream().anyMatch(block -> block.getId().equals(metadataBlock.getId())); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 7230f9f9c0a..5bf54ac1ec1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -2,7 +2,6 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; @@ -11,6 +10,8 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.sql.Timestamp; @@ -46,31 +47,31 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { //the single dataset update api checks for drafts before calling the command return; } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(target.getProtocol(), ctxt); + PidProvider pidProvider = PidUtil.getPidProvider(target.getGlobalId().getProviderId()); + try { - Boolean doiRetString = idServiceBean.publicizeIdentifier(target); + Boolean doiRetString = pidProvider.updateIdentifier(target); if (doiRetString) { target.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(target); ctxt.em().flush(); // When updating, we want to traverse through files even if the dataset itself // didn't need updating. - String currentGlobalIdProtocol = ctxt.settings().getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String dataFilePIDFormat = ctxt.settings().getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); boolean isFilePIDsEnabled = ctxt.systemConfig().isFilePIDsEnabledForCollection(target.getOwner()); // We will skip trying to update the global identifiers for datafiles if they // aren't being used. // If they are, we need to assure that there's an existing PID or, as when - // creating PIDs, that the protocol matches that of the dataset DOI if - // we're going to create a DEPENDENT file PID. - String protocol = target.getProtocol(); + // creating PIDs, that it's possible. + + boolean canCreatePidsForFiles = + isFilePIDsEnabled && ctxt.dvObjects().getEffectivePidGenerator(target).canCreatePidsLike(target.getGlobalId()); + for (DataFile df : target.getFiles()) { if (isFilePIDsEnabled && // using file PIDs and (!(df.getIdentifier() == null || df.getIdentifier().isEmpty()) || // identifier exists, or - currentGlobalIdProtocol.equals(protocol) || // right protocol to create dependent DOIs, or - dataFilePIDFormat.equals("INDEPENDENT"))// or independent. TODO(pm) - check authority too + canCreatePidsForFiles) // we can create PIDs for files ) { - doiRetString = idServiceBean.publicizeIdentifier(df); + doiRetString = pidProvider.updateIdentifier(df); if (doiRetString) { df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); ctxt.em().merge(df); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java new file mode 100644 index 00000000000..619740ddd89 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -0,0 +1,41 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class ValidateDatasetJsonCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + private final String datasetJson; + + public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) { + super(aRequest, target); + dataverse = target; + datasetJson = datasetJsonIn; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + + return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java index 5119b4b96c7..edd01ae98a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java @@ -545,6 +545,16 @@ private void createDataFileDDI(XMLStreamWriter xmlw, Set excludedFieldSe List vars = variableService.findByDataTableId(dt.getId()); if (checkField("catgry", excludedFieldSet, includedFieldSet)) { if (checkIsWithoutFrequencies(vars)) { + // @todo: the method called here to calculate frequencies + // when they are missing from the database (for whatever + // reasons) subsets the physical tab-delimited file and + // calculates them in real time. this is very expensive operation + // potentially. let's make sure that, when we do this, we + // save the resulting frequencies in the database, so that + // we don't have to do this again. Also, let's double check + // whether the "checkIsWithoutFrequencies()" method is doing + // the right thing - as it appears to return true when there + // are no categorical variables in the DataTable (?) calculateFrequencies(df, vars); } } @@ -580,6 +590,7 @@ private boolean checkIsWithoutFrequencies(List vars) { private void calculateFrequencies(DataFile df, List vars) { + // @todo: see the comment in the part of the code that calls this method try { DataConverter dc = new DataConverter(); File tabFile = dc.downloadFromStorageIO(df.getStorageIO()); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java index a7967f6ccb6..f0d77eb8b52 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/InternalExportDataProvider.java @@ -8,12 +8,11 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; - -import edu.harvard.iq.dataverse.DOIDataCiteRegisterService; import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DOIDataCiteRegisterService; import io.gdcc.spi.export.ExportDataProvider; import edu.harvard.iq.dataverse.util.bagit.OREMap; import edu.harvard.iq.dataverse.util.json.JsonPrinter; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 1b61a9c9970..9a689f7a4ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1818,11 +1818,13 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // labl if ((vm == null || !vm.containsKey("label"))) { - xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "variable"); - xmlw.writeCharacters(dvar.getString("label")); - xmlw.writeEndElement(); //labl - } else if (vm != null && vm.containsKey("label")) { + if(dvar.containsKey("label")) { + xmlw.writeStartElement("labl"); + writeAttribute(xmlw, "level", "variable"); + xmlw.writeCharacters(dvar.getString("label")); + xmlw.writeEndElement(); //labl + } + } else { xmlw.writeStartElement("labl"); writeAttribute(xmlw, "level", "variable"); xmlw.writeCharacters(vm.getString("label")); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 7b0a92a4372..4b8822e8b66 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -13,16 +13,16 @@ import com.google.gson.Gson; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -189,10 +189,10 @@ public static void writeIdentifierElement(XMLStreamWriter xmlw, String identifie if (StringUtils.isNotBlank(identifier)) { Map identifier_map = new HashMap(); - if (StringUtils.containsIgnoreCase(identifier, DOIServiceBean.DOI_RESOLVER_URL)) { + if (StringUtils.containsIgnoreCase(identifier, AbstractDOIProvider.DOI_RESOLVER_URL)) { identifier_map.put("identifierType", "DOI"); identifier = StringUtils.substring(identifier, identifier.indexOf("10.")); - } else if (StringUtils.containsIgnoreCase(identifier, HandlenetServiceBean.HDL_RESOLVER_URL)) { + } else if (StringUtils.containsIgnoreCase(identifier, HandlePidProvider.HDL_RESOLVER_URL)) { identifier_map.put("identifierType", "Handle"); if (StringUtils.contains(identifier, "http")) { identifier = identifier.replace(identifier.substring(0, identifier.indexOf("/") + 2), ""); @@ -437,7 +437,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO for (String subject : fieldDTO.getMultipleVocab()) { if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, null, null, subject, language); + writeSubjectElement(xmlw, null, null, null, subject, language); } } } @@ -446,7 +446,8 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO for (HashSet fieldDTOs : fieldDTO.getMultipleCompound()) { String subject = null; String subjectScheme = null; - String schemeURI = null; + String keywordTermURI = null; + String keywordVocabURI = null; for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -454,18 +455,22 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO subject = next.getSinglePrimitive(); } + if (DatasetFieldConstant.keywordTermURI.equals(next.getTypeName())) { + keywordTermURI = next.getSinglePrimitive(); + } + if (DatasetFieldConstant.keywordVocab.equals(next.getTypeName())) { subjectScheme = next.getSinglePrimitive(); } - + if (DatasetFieldConstant.keywordVocabURI.equals(next.getTypeName())) { - schemeURI = next.getSinglePrimitive(); + keywordVocabURI = next.getSinglePrimitive(); } } if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, subjectScheme, schemeURI, subject, language); + writeSubjectElement(xmlw, subjectScheme, keywordTermURI, keywordVocabURI, subject, language); } } } @@ -493,7 +498,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (StringUtils.isNotBlank(subject)) { subject_check = writeOpenTag(xmlw, "subjects", subject_check); - writeSubjectElement(xmlw, subjectScheme, schemeURI, subject, language); + writeSubjectElement(xmlw, subjectScheme, null, schemeURI, subject, language); } } } @@ -513,7 +518,7 @@ public static void writeSubjectsElement(XMLStreamWriter xmlw, DatasetVersionDTO * @param language * @throws XMLStreamException */ - private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectScheme, String schemeURI, String value, String language) throws XMLStreamException { + private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectScheme, String valueURI, String schemeURI, String value, String language) throws XMLStreamException { // write a subject Map subject_map = new HashMap(); @@ -524,6 +529,9 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, String subjectSche if (StringUtils.isNotBlank(subjectScheme)) { subject_map.put("subjectScheme", subjectScheme); } + if (StringUtils.isNotBlank(valueURI)) { + subject_map.put("valueURI", valueURI); + } if (StringUtils.isNotBlank(schemeURI)) { subject_map.put("schemeURI", schemeURI); } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java index 7f1f46c06cb..c103f6981e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalTool.java @@ -40,6 +40,7 @@ public class ExternalTool implements Serializable { public static final String TOOL_NAME = "toolName"; public static final String ALLOWED_API_CALLS = "allowedApiCalls"; public static final String REQUIREMENTS = "requirements"; + public static final String AUX_FILES_EXIST = "auxFilesExist"; @Id @GeneratedValue(strategy = GenerationType.IDENTITY) @@ -362,4 +363,9 @@ public void setRequirements(String requirements) { this.requirements = requirements; } + public boolean accessesAuxFiles() { + String reqs = getRequirements(); + return reqs!=null && reqs.contains(AUX_FILES_EXIST); + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java index cdde9fbe0e8..e7ae451cacf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandler.java @@ -22,12 +22,8 @@ import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.json.Json; -import jakarta.json.JsonArray; -import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonNumber; import jakarta.json.JsonObject; -import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonString; import jakarta.json.JsonValue; import jakarta.ws.rs.HttpMethod; @@ -43,15 +39,10 @@ */ public class ExternalToolHandler extends URLTokenUtil { - private final ExternalTool externalTool; + public final ExternalTool externalTool; private String requestMethod; - - public static final String HTTP_METHOD="httpMethod"; - public static final String TIMEOUT="timeOut"; - public static final String SIGNED_URL="signedUrl"; - public static final String NAME="name"; - public static final String URL_TEMPLATE="urlTemplate"; + /** @@ -136,12 +127,12 @@ public String handleRequest(boolean preview) { } else { // ToDo - if the allowedApiCalls() are defined, could/should we send them to - // tools using GET as well? + // tools using POST as well? if (requestMethod.equals(HttpMethod.POST)) { - String body = JsonUtil.prettyPrint(createPostBody(params).build()); + String body = JsonUtil.prettyPrint(createPostBody(params, null).build()); try { - logger.info("POST Body: " + body); + logger.fine("POST Body: " + body); return postFormData(body); } catch (IOException | InterruptedException ex) { Logger.getLogger(ExternalToolHandler.class.getName()).log(Level.SEVERE, null, ex); @@ -151,60 +142,6 @@ public String handleRequest(boolean preview) { return null; } - public JsonObject getParams(JsonObject toolParameters) { - //ToDo - why an array of object each with a single key/value pair instead of one object? - JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); - - // ToDo return json and print later - JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); - if (!(queryParams == null) && !queryParams.isEmpty()) { - queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { - queryParam.keySet().forEach((key) -> { - String value = queryParam.getString(key); - JsonValue param = getParam(value); - if (param != null) { - paramsBuilder.add(key, param); - } - }); - }); - } - return paramsBuilder.build(); - } - - public JsonObjectBuilder createPostBody(JsonObject params) { - JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); - bodyBuilder.add("queryParameters", params); - String apiCallStr = externalTool.getAllowedApiCalls(); - if (apiCallStr != null && !apiCallStr.isBlank()) { - JsonArray apiArray = JsonUtil.getJsonArray(externalTool.getAllowedApiCalls()); - JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); - apiArray.getValuesAs(JsonObject.class).forEach(((apiObj) -> { - logger.fine(JsonUtil.prettyPrint(apiObj)); - String name = apiObj.getJsonString(NAME).getString(); - String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); - int timeout = apiObj.getInt(TIMEOUT); - String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); - logger.fine("URL Template: " + urlTemplate); - urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; - String apiPath = replaceTokensWithValues(urlTemplate); - logger.fine("URL WithTokens: " + apiPath); - String url = apiPath; - // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) - ApiToken apiToken = getApiToken(); - if (apiToken != null) { - url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), - httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") - + getApiToken().getTokenString()); - } - logger.fine("Signed URL: " + url); - apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) - .add(SIGNED_URL, url).add(TIMEOUT, timeout)); - })); - bodyBuilder.add("signedUrls", apisBuilder); - } - return bodyBuilder; - } - private String postFormData(String allowedApis) throws IOException, InterruptedException { String url = null; HttpClient client = HttpClient.newHttpClient(); diff --git a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java index e13843eadfa..5ee0bf1355d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBean.java @@ -5,6 +5,8 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.externaltools.ExternalTool.Type; import edu.harvard.iq.dataverse.util.URLTokenUtil.ReservedWord; import edu.harvard.iq.dataverse.util.json.JsonUtil; @@ -141,9 +143,10 @@ public List findExternalToolsByFile(List allExternal List externalTools = new ArrayList<>(); //Map tabular data to it's mimetype (the isTabularData() check assures that this code works the same as before, but it may need to change if tabular data is split into subtypes with differing mimetypes) final String contentType = file.isTabularData() ? DataFileServiceBean.MIME_TYPE_TSV_ALT : file.getContentType(); + boolean isAccessible = StorageIO.isDataverseAccessible(DataAccess.getStorageDriverFromIdentifier(file.getStorageIdentifier())); allExternalTools.forEach((externalTool) -> { //Match tool and file type, then check requirements - if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file)) { + if (contentType.equals(externalTool.getContentType()) && meetsRequirements(externalTool, file) && (isAccessible || externalTool.accessesAuxFiles())) { externalTools.add(externalTool); } }); @@ -159,7 +162,7 @@ public boolean meetsRequirements(ExternalTool externalTool, DataFile dataFile) { } boolean meetsRequirements = true; JsonObject requirementsObj = JsonUtil.getJsonObject(requirements); - JsonArray auxFilesExist = requirementsObj.getJsonArray("auxFilesExist"); + JsonArray auxFilesExist = requirementsObj.getJsonArray(ExternalTool.AUX_FILES_EXIST); for (JsonValue jsonValue : auxFilesExist) { String formatTag = jsonValue.asJsonObject().getString("formatTag"); String formatVersion = jsonValue.asJsonObject().getString("formatVersion"); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java index 877fc68e4a1..c93e2c6aa94 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/AccessToken.java @@ -46,7 +46,7 @@ String getRefreshToken() { return refreshToken; } - ArrayList getOtherTokens() { + public ArrayList getOtherTokens() { return otherTokens; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java new file mode 100644 index 00000000000..7e555935e2e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusEndpoint.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.globus; + +public class GlobusEndpoint { + + private String id; + private String clientToken; + private String basePath; + + public GlobusEndpoint(String id, String clientToken, String basePath) { + this.id = id; + this.clientToken = clientToken; + this.basePath = basePath; + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getClientToken() { + return clientToken; + } + + public void setClientToken(String clientToken) { + this.clientToken = clientToken; + } + + public String getBasePath() { + return basePath; + } + + public void setBasePath(String basePath) { + this.basePath = basePath; + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d6943ec3511..fb50214c259 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.globus; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Scheduler; import com.google.gson.FieldNamingPolicy; import com.google.gson.GsonBuilder; import edu.harvard.iq.dataverse.*; - import jakarta.ejb.Asynchronous; import jakarta.ejb.EJB; import jakarta.ejb.Stateless; @@ -15,8 +17,13 @@ import jakarta.json.JsonArray; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonPatch; +import jakarta.json.JsonString; +import jakarta.json.JsonValue.ValueType; +import jakarta.json.stream.JsonParsingException; import jakarta.servlet.http.HttpServletRequest; +import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.toJsonArray; @@ -29,6 +36,8 @@ import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; +import java.time.Duration; +import java.time.temporal.ChronoUnit; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; @@ -40,17 +49,26 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.apache.commons.codec.binary.StringUtils; +import org.primefaces.PrimeFaces; + import com.google.gson.Gson; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.privateurl.PrivateUrl; +import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; +import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; @Stateless @@ -59,197 +77,313 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB protected DatasetServiceBean datasetSvc; - @EJB protected SettingsServiceBean settingsSvc; - @Inject DataverseSession session; - @EJB protected AuthenticationServiceBean authSvc; - @EJB EjbDataverseEngine commandEngine; - @EJB UserNotificationServiceBean userNotificationService; + @EJB + PrivateUrlServiceBean privateUrlService; + @EJB + FileDownloadServiceBean fileDownloadService; + @EJB + DataFileServiceBean dataFileService; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - private String code; - private String userTransferToken; - private String state; - - public String getState() { - return state; - } - - public void setState(String state) { - this.state = state; - } - - public String getCode() { - return code; - } - - public void setCode(String code) { - this.code = code; - } - - public String getUserTransferToken() { - return userTransferToken; - } + private String getRuleId(GlobusEndpoint endpoint, String principal, String permissions) + throws MalformedURLException { - public void setUserTransferToken(String userTransferToken) { - this.userTransferToken = userTransferToken; - } + String principalType = "identity"; - ArrayList checkPermisions(AccessToken clientTokenUser, String directory, String globusEndpoint, - String principalType, String principal) throws MalformedURLException { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access_list"); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - ArrayList ids = new ArrayList(); + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access_list"); + MakeRequestResponse result = makeRequest(url, "Bearer", endpoint.getClientToken(), "GET", null); if (result.status == 200) { AccessList al = parseJson(result.jsonResponse, AccessList.class, false); for (int i = 0; i < al.getDATA().size(); i++) { Permissions pr = al.getDATA().get(i); - if ((pr.getPath().equals(directory + "/") || pr.getPath().equals(directory)) + + if ((pr.getPath().equals(endpoint.getBasePath() + "/") || pr.getPath().equals(endpoint.getBasePath())) && pr.getPrincipalType().equals(principalType) - && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal)))) { - ids.add(pr.getId()); + && ((principal == null) || (principal != null && pr.getPrincipal().equals(principal))) + && pr.getPermissions().equals(permissions)) { + return pr.getId(); } else { - logger.info(pr.getPath() + " === " + directory + " == " + pr.getPrincipalType()); + logger.fine(pr.getPath() + " === " + endpoint.getBasePath() + " == " + pr.getPrincipalType()); continue; } } } - - return ids; + return null; } - public void updatePermision(AccessToken clientTokenUser, String directory, String principalType, String perm) - throws MalformedURLException { - if (directory != null && !directory.equals("")) { - directory = directory + "/"; - } - logger.info("Start updating permissions." + " Directory is " + directory); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, null); - logger.info("Size of rules " + rules.size()); - int count = 0; - while (count < rules.size()) { - logger.info("Start removing rules " + rules.get(count)); - Permissions permissions = new Permissions(); - permissions.setDATA_TYPE("access"); - permissions.setPermissions(perm); - permissions.setPath(directory); - - Gson gson = new GsonBuilder().create(); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(count)); - logger.info("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(count)); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", gson.toJson(permissions)); - if (result.status != 200) { - logger.warning("Cannot update access rule " + rules.get(count)); - } else { - logger.info("Access rule " + rules.get(count) + " was updated"); - } - count++; - } - } - - public void deletePermision(String ruleId, Logger globusLogger) throws MalformedURLException { - + /** + * Call to delete a globus rule related to the specified dataset. + * + * @param ruleId - Globus rule id - assumed to be associated with the + * dataset's file path (should not be called with a user + * specified rule id w/o further checking) + * @param datasetId - the id of the dataset associated with the rule + * @param globusLogger - a separate logger instance, may be null + */ + public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { + globusLogger.fine("Start deleting rule " + ruleId + " for dataset " + dataset.getId()); if (ruleId.length() > 0) { - AccessToken clientTokenUser = getClientToken(); - globusLogger.info("Start deleting permissions."); - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - - URL url = new URL( - "https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" + ruleId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "DELETE", null); - if (result.status != 200) { - globusLogger.warning("Cannot delete access rule " + ruleId); - } else { - globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + if (dataset != null) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + if (endpoint != null) { + String accessToken = endpoint.getClientToken(); + globusLogger.info("Start deleting permissions."); + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + + "/access/" + ruleId); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "DELETE", null); + if (result.status != 200) { + globusLogger.warning("Cannot delete access rule " + ruleId); + } else { + globusLogger.info("Access rule " + ruleId + " was deleted successfully"); + } + } catch (MalformedURLException ex) { + logger.log(Level.WARNING, + "Failed to delete access rule " + ruleId + " on endpoint " + endpoint.getId(), ex); + } + } } } - } - public int givePermission(String principalType, String principal, String perm, AccessToken clientTokenUser, - String directory, String globusEndpoint) throws MalformedURLException { + /** + * Request read/write access for the specified principal and generate a list of + * accessible paths for new files for the specified dataset. + * + * @param principal - the id of the Globus principal doing the transfer + * @param dataset + * @param numberOfPaths - how many files are to be transferred + * @return + */ + public JsonObject requestAccessiblePaths(String principal, Dataset dataset, int numberOfPaths) { - ArrayList rules = checkPermisions(clientTokenUser, directory, globusEndpoint, principalType, principal); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + String principalType = "identity"; Permissions permissions = new Permissions(); permissions.setDATA_TYPE("access"); permissions.setPrincipalType(principalType); permissions.setPrincipal(principal); - permissions.setPath(directory + "/"); - permissions.setPermissions(perm); + permissions.setPath(endpoint.getBasePath() + "/"); + permissions.setPermissions("rw"); + + JsonObjectBuilder response = Json.createObjectBuilder(); + //Try to create the directory (202 status) if it does not exist (502-already exists) + int mkDirStatus = makeDirs(endpoint, dataset); + if (!(mkDirStatus== 202 || mkDirStatus == 502)) { + return response.add("status", mkDirStatus).build(); + } + //The dir for the dataset's data exists, so try to request permission for the principal + int requestPermStatus = requestPermission(endpoint, dataset, permissions); + response.add("status", requestPermStatus); + if (requestPermStatus == 201) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonObjectBuilder paths = Json.createObjectBuilder(); + for (int i = 0; i < numberOfPaths; i++) { + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + int lastIndex = Math.max(storageIdentifier.lastIndexOf("/"), storageIdentifier.lastIndexOf(":")); + paths.add(storageIdentifier, endpoint.getBasePath() + "/" + storageIdentifier.substring(lastIndex + 1)); + } + response.add("paths", paths.build()); + } + return response.build(); + } + + /** + * Call to create the directories for the specified dataset. + * + * @param dataset + * @return - an error status at whichever subdir the process fails at or the + * final success status + */ + private int makeDirs(GlobusEndpoint endpoint, Dataset dataset) { + logger.fine("Creating dirs: " + endpoint.getBasePath()); + int index = endpoint.getBasePath().lastIndexOf(dataset.getAuthorityForFileStorage()) + + dataset.getAuthorityForFileStorage().length(); + String nextDir = endpoint.getBasePath().substring(0, index); + int response = makeDir(endpoint, nextDir); + String identifier = dataset.getIdentifierForFileStorage(); + //Usually identifiers will have 0 or 1 slashes (e.g. FK2/ABCDEF) but the while loop will handle any that could have more + //Will skip if the first makeDir above failed + while ((identifier.length() > 0) && ((response == 202 || response == 502))) { + index = identifier.indexOf('/'); + if (index == -1) { + //Last dir to create + response = makeDir(endpoint, nextDir + "/" + identifier); + identifier = ""; + } else { + //The next dir to create + nextDir = nextDir + "/" + identifier.substring(0, index); + response = makeDir(endpoint, nextDir); + //The rest of the identifier + identifier = identifier.substring(index + 1); + } + } + return response; + } + + private int makeDir(GlobusEndpoint endpoint, String dir) { + MakeRequestResponse result = null; + String body = "{\"DATA_TYPE\":\"mkdir\",\"path\":\"" + dir + "\"}"; + try { + logger.fine(body); + URL url = new URL( + "https://transfer.api.globusonline.org/v0.10/operation/endpoint/" + endpoint.getId() + "/mkdir"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", body); + + switch (result.status) { + case 202: + logger.fine("Dir " + dir + " was created successfully."); + break; + case 502: + logger.fine("Dir " + dir + " already exists."); + break; + default: + logger.warning("Status " + result.status + " received when creating dir " + dir); + logger.fine("Response: " + result.jsonResponse); + } + } catch (MalformedURLException ex) { + // Misconfiguration + logger.warning("Failed to create dir on " + endpoint.getId()); + return 500; + } + return result.status; + } + + private int requestPermission(GlobusEndpoint endpoint, Dataset dataset, Permissions permissions) { Gson gson = new GsonBuilder().create(); MakeRequestResponse result = null; - if (rules.size() == 0) { - logger.info("Start creating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access"); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "POST", - gson.toJson(permissions)); + logger.fine("Start creating the rule"); - if (result.status == 400) { + try { + URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + endpoint.getId() + "/access"); + result = makeRequest(url, "Bearer", endpoint.getClientToken(), "POST", gson.toJson(permissions)); + + switch (result.status) { + case 404: + logger.severe("Endpoint " + endpoint.getId() + " was not found"); + break; + case 400: logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { + break; + case 409: logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); + break; + case 201: + JsonObject globusResponse = JsonUtil.getJsonObject(result.jsonResponse); + if (globusResponse != null && globusResponse.containsKey("access_id")) { + permissions.setId(globusResponse.getString("access_id")); + monitorTemporaryPermissions(permissions.getId(), dataset.getId()); + logger.fine("Access rule " + permissions.getId() + " was created successfully"); + } else { + // Shouldn't happen! + logger.warning("Access rule id not returned for dataset " + dataset.getId()); + } } - return result.status; - } else { - logger.info("Start Updating the rule"); - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/access/" - + rules.get(0)); - result = makeRequest(url, "Bearer", clientTokenUser.getOtherTokens().get(0).getAccessToken(), "PUT", - gson.toJson(permissions)); - - if (result.status == 400) { - logger.severe("Path " + permissions.getPath() + " is not valid"); - } else if (result.status == 409) { - logger.warning("ACL already exists or Endpoint ACL already has the maximum number of access rules"); - } - logger.info("Result status " + result.status); + } catch (MalformedURLException ex) { + // Misconfiguration + logger.warning("Failed to create access rule URL for " + endpoint.getId()); + return 500; } - - return result.status; } - public boolean getSuccessfulTransfers(AccessToken clientTokenUser, String taskId) throws MalformedURLException { - - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId - + "/successful_transfers"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + /** + * Given an array of remote files to be referenced in the dataset, create a set + * of valid storage identifiers and return a map of the remote file paths to + * storage identifiers. + * + * @param dataset + * @param referencedFiles - a JSON array of remote files to be referenced in the + * dataset - each should be a string with the /path/to/file + * @return - a map of supplied paths to valid storage identifiers + */ + public JsonObject requestReferenceFileIdentifiers(Dataset dataset, JsonArray referencedFiles) { + String driverId = dataset.getEffectiveStorageDriverId(); + JsonArray endpoints = GlobusAccessibleStore.getReferenceEndpointsWithPaths(driverId); + + JsonObjectBuilder fileMap = Json.createObjectBuilder(); + referencedFiles.forEach(value -> { + if (value.getValueType() != ValueType.STRING) { + throw new JsonParsingException("ReferencedFiles must be strings", null); + } + String referencedFile = ((JsonString) value).getString(); + boolean valid = false; + for (int i = 0; i < endpoints.size(); i++) { + if (referencedFile.startsWith(((JsonString) endpoints.get(i)).getString())) { + valid = true; + } + } + if (!valid) { + throw new IllegalArgumentException( + "Referenced file " + referencedFile + " is not in an allowed endpoint/path"); + } + String storageIdentifier = DataAccess.getNewStorageIdentifier(driverId); + fileMap.add(referencedFile, storageIdentifier + "//" + referencedFile); + }); + return fileMap.build(); + } - if (result.status == 200) { - logger.info(" SUCCESS ====== "); - return true; - } - return false; + /** + * A cache of temporary permission requests - for upload (rw) and download (r) + * access. When a temporary permission request is created, it is added to the + * cache. After GLOBUS_CACHE_MAXAGE minutes, if a transfer has not been started, + * the permission will be revoked/deleted. (If a transfer has been started, the + * permission will not be revoked/deleted until the transfer is complete. This + * is handled in other methods.) + */ + // ToDo - nominally this doesn't need to be as long as the allowed time for the + // downloadCache so there could be two separate settings. + // Single cache of open rules/permission requests + private final Cache rulesCache = Caffeine.newBuilder() + .expireAfterWrite(Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class), ChronoUnit.MINUTES)) + .scheduler(Scheduler.systemScheduler()).evictionListener((ruleId, datasetId, cause) -> { + // Delete rules that expire + logger.fine("Rule " + ruleId + " expired"); + Dataset dataset = datasetSvc.find(datasetId); + deletePermission((String) ruleId, dataset, logger); + }) + + .build(); + + // Convenience method to add a temporary permission request to the cache - + // allows logging of temporary permission requests + private void monitorTemporaryPermissions(String ruleId, long datasetId) { + logger.fine("Adding rule " + ruleId + " for dataset " + datasetId); + rulesCache.put(ruleId, datasetId); } - public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger globusLogger) throws MalformedURLException { + /** + * Call the Globus API to get info about the transfer. + * + * @param accessToken + * @param taskId - the Globus task id supplied by the user + * @param globusLogger - the transaction-specific logger to use (separate log + * files are created in general, some calls may use the + * class logger) + * @return + * @throws MalformedURLException + */ + public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); + MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null); GlobusTask task = null; @@ -264,49 +398,34 @@ public GlobusTask getTask(AccessToken clientTokenUser, String taskId, Logger glo return task; } - public AccessToken getClientToken() throws MalformedURLException { - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - URL url = new URL( - "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + /** + * Globus call to get an access token for the user using the long-term token we + * hold. + * + * @param globusBasicToken - the base64 encoded Globus Basic token comprised of + * the : + * @return - a valid Globus access token + */ + public static AccessToken getClientToken(String globusBasicToken) { + URL url; AccessToken clientTokenUser = null; - if (result.status == 200) { - clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - } - return clientTokenUser; - } - - public AccessToken getAccessToken(HttpServletRequest origRequest, String globusBasicToken) - throws UnsupportedEncodingException, MalformedURLException { - String serverName = origRequest.getServerName(); - if (serverName.equals("localhost")) { - logger.severe("Changing localhost to utoronto"); - serverName = "utl-192-123.library.utoronto.ca"; - } - String redirectURL = "https://" + serverName + "/globus.xhtml"; - - redirectURL = URLEncoder.encode(redirectURL, "UTF-8"); - - URL url = new URL("https://auth.globus.org/v2/oauth2/token?code=" + code + "&redirect_uri=" + redirectURL - + "&grant_type=authorization_code"); - logger.info(url.toString()); - - MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); - AccessToken accessTokenUser = null; + try { + url = new URL( + "https://auth.globus.org/v2/oauth2/token?scope=openid+email+profile+urn:globus:auth:scope:transfer.api.globus.org:all&grant_type=client_credentials"); - if (result.status == 200) { - logger.info("Access Token: \n" + result.toString()); - accessTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); - logger.info(accessTokenUser.getAccessToken()); + MakeRequestResponse result = makeRequest(url, "Basic", globusBasicToken, "POST", null); + if (result.status == 200) { + clientTokenUser = parseJson(result.jsonResponse, AccessToken.class, true); + } + } catch (MalformedURLException e) { + // On a statically defined URL... + e.printStackTrace(); } - - return accessTokenUser; - + return clientTokenUser; } - public MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, + private static MakeRequestResponse makeRequest(URL url, String authType, String authCode, String method, String jsonString) { String str = null; HttpURLConnection connection = null; @@ -314,8 +433,7 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode try { connection = (HttpURLConnection) url.openConnection(); // Basic - // NThjMGYxNDQtN2QzMy00ZTYzLTk3MmUtMjljNjY5YzJjNGJiOktzSUVDMDZtTUxlRHNKTDBsTmRibXBIbjZvaWpQNGkwWVVuRmQyVDZRSnc9 - logger.info(authType + " " + authCode); + logger.fine("For URL: " + url.toString()); connection.setRequestProperty("Authorization", authType + " " + authCode); // connection.setRequestProperty("Content-Type", // "application/x-www-form-urlencoded"); @@ -323,32 +441,30 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode if (jsonString != null) { connection.setRequestProperty("Content-Type", "application/json"); connection.setRequestProperty("Accept", "application/json"); - logger.info(jsonString); + logger.fine(jsonString); connection.setDoOutput(true); + OutputStreamWriter wr = new OutputStreamWriter(connection.getOutputStream()); wr.write(jsonString); wr.flush(); } status = connection.getResponseCode(); - logger.info("Status now " + status); + logger.fine("Status now " + status); InputStream result = connection.getInputStream(); if (result != null) { - logger.info("Result is not null"); str = readResultJson(result).toString(); - logger.info("str is "); - logger.info(result.toString()); + logger.fine("str is " + result.toString()); } else { - logger.info("Result is null"); + logger.fine("Result is null"); str = null; } - logger.info("status: " + status); + logger.fine("status: " + status); } catch (IOException ex) { - logger.info("IO"); logger.severe(ex.getMessage()); - logger.info(ex.getCause().toString()); - logger.info(ex.getStackTrace().toString()); + logger.fine(ex.getCause().toString()); + logger.fine(ex.getStackTrace().toString()); } finally { if (connection != null) { connection.disconnect(); @@ -359,18 +475,16 @@ public MakeRequestResponse makeRequest(URL url, String authType, String authCode } - private StringBuilder readResultJson(InputStream in) { + private static StringBuilder readResultJson(InputStream in) { StringBuilder sb = null; - try { - - BufferedReader br = new BufferedReader(new InputStreamReader(in)); + try (BufferedReader br = new BufferedReader(new InputStreamReader(in))) { sb = new StringBuilder(); String line; while ((line = br.readLine()) != null) { sb.append(line + "\n"); } br.close(); - logger.info(sb.toString()); + logger.fine(sb.toString()); } catch (IOException e) { sb = null; logger.severe(e.getMessage()); @@ -378,7 +492,7 @@ private StringBuilder readResultJson(InputStream in) { return sb; } - private T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { + private static T parseJson(String sb, Class jsonParserClass, boolean namingPolicy) { if (sb != null) { Gson gson = null; if (namingPolicy) { @@ -395,32 +509,7 @@ private T parseJson(String sb, Class jsonParserClass, boolean namingPolic } } - public String getDirectory(String datasetId) { - Dataset dataset = null; - String directory = null; - try { - dataset = datasetSvc.find(Long.parseLong(datasetId)); - if (dataset == null) { - logger.severe("Dataset not found " + datasetId); - return null; - } - String storeId = dataset.getStorageIdentifier(); - storeId.substring(storeId.indexOf("//") + 1); - directory = storeId.substring(storeId.indexOf("//") + 1); - logger.info(storeId); - logger.info(directory); - logger.info("Storage identifier:" + dataset.getIdentifierForFileStorage()); - return directory; - - } catch (NumberFormatException nfe) { - logger.severe(nfe.getMessage()); - - return null; - } - - } - - class MakeRequestResponse { + static class MakeRequestResponse { public String jsonResponse; public int status; @@ -431,81 +520,61 @@ class MakeRequestResponse { } - private MakeRequestResponse findDirectory(String directory, AccessToken clientTokenUser, String globusEndpoint) - throws MalformedURLException { - URL url = new URL(" https://transfer.api.globusonline.org/v0.10/endpoint/" + globusEndpoint + "/ls?path=" - + directory + "/"); - - MakeRequestResponse result = makeRequest(url, "Bearer", - clientTokenUser.getOtherTokens().get(0).getAccessToken(), "GET", null); - logger.info("find directory status:" + result.status); - - return result; + /** + * Cache of open download Requests This cache keeps track of the set of files + * selected for transfer out (download) via Globus. It is a means of + * transferring the list from the DatasetPage, where it is generated via user UI + * actions, and the Datasets/globusDownloadParameters API. + * + * Nominally, the dataverse-globus app will call that API endpoint and then + * /requestGlobusDownload, at which point the cached info is sent to the app. If + * the app doesn't call within 5 minutes (the time allowed to call + * /globusDownloadParameters) + GLOBUS_CACHE_MAXAGE minutes (a ~longer period + * giving the user time to make choices in the app), the cached info is deleted. + * + */ + private final Cache downloadCache = Caffeine.newBuilder() + .expireAfterWrite( + Duration.of(JvmSettings.GLOBUS_CACHE_MAXAGE.lookup(Integer.class) + 5, ChronoUnit.MINUTES)) + .scheduler(Scheduler.systemScheduler()).evictionListener((downloadId, datasetId, cause) -> { + // Delete downloads that expire + logger.fine("Download for " + downloadId + " expired"); + }) + + .build(); + + public JsonObject getFilesForDownload(String downloadId) { + return downloadCache.getIfPresent(downloadId); } - public boolean giveGlobusPublicPermissions(String datasetId) - throws UnsupportedEncodingException, MalformedURLException { - - String globusEndpoint = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusEndpoint, ""); - String globusBasicToken = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - if (globusEndpoint.equals("") || globusBasicToken.equals("")) { - return false; - } - AccessToken clientTokenUser = getClientToken(); - if (clientTokenUser == null) { - logger.severe("Cannot get client token "); - return false; - } - - String directory = getDirectory(datasetId); - logger.info(directory); + public int setPermissionForDownload(Dataset dataset, String principal) { + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + String principalType = "identity"; - MakeRequestResponse status = findDirectory(directory, clientTokenUser, globusEndpoint); - - if (status.status == 200) { - - /* - * FilesList fl = parseJson(status.jsonResponse, FilesList.class, false); - * ArrayList files = fl.getDATA(); if (files != null) { for (FileG file: - * files) { if (!file.getName().contains("cached") && - * !file.getName().contains(".thumb")) { int perStatus = - * givePermission("all_authenticated_users", "", "r", clientTokenUser, directory - * + "/" + file.getName(), globusEndpoint); logger.info("givePermission status " - * + perStatus + " for " + file.getName()); if (perStatus == 409) { - * logger.info("Permissions already exist or limit was reached for " + - * file.getName()); } else if (perStatus == 400) { - * logger.info("No file in Globus " + file.getName()); } else if (perStatus != - * 201) { logger.info("Cannot get permission for " + file.getName()); } } } } - */ - - int perStatus = givePermission("all_authenticated_users", "", "r", clientTokenUser, directory, - globusEndpoint); - logger.info("givePermission status " + perStatus); - if (perStatus == 409) { - logger.info("Permissions already exist or limit was reached"); - } else if (perStatus == 400) { - logger.info("No directory in Globus"); - } else if (perStatus != 201 && perStatus != 200) { - logger.info("Cannot give read permission"); - return false; - } - - } else if (status.status == 404) { - logger.info("There is no globus directory"); - } else { - logger.severe("Cannot find directory in globus, status " + status); - return false; - } + Permissions permissions = new Permissions(); + permissions.setDATA_TYPE("access"); + permissions.setPrincipalType(principalType); + permissions.setPrincipal(principal); + permissions.setPath(endpoint.getBasePath() + "/"); + permissions.setPermissions("r"); - return true; + return requestPermission(endpoint, dataset, permissions); } - // Generates the URL to launch the Globus app + // Generates the URL to launch the Globus app for upload public String getGlobusAppUrlForDataset(Dataset d) { return getGlobusAppUrlForDataset(d, true, null); } - public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) { + /** + * Generated the App URl for upload (in) or download (out) + * + * @param d - the dataset involved + * @param upload - boolean, true for upload, false for download + * @param dataFiles - a list of the DataFiles to be downloaded + * @return + */ + public String getGlobusAppUrlForDataset(Dataset d, boolean upload, List dataFiles) { String localeCode = session.getLocaleCode(); ApiToken apiToken = null; User user = session.getUser(); @@ -518,46 +587,53 @@ public String getGlobusAppUrlForDataset(Dataset d, boolean upload, DataFile df) apiToken = authSvc.generateApiTokenForUser((AuthenticatedUser) user); } } - String storePrefix = ""; String driverId = d.getEffectiveStorageDriverId(); try { - storePrefix = DataAccess.getDriverPrefix(driverId); } catch (Exception e) { logger.warning("GlobusAppUrlForDataset: Failed to get storePrefix for " + driverId); } - //Use URLTokenUtil for params currently in common with external tools. - URLTokenUtil tokenUtil = new URLTokenUtil(d, df, apiToken, localeCode); - String appUrl; + + // Use URLTokenUtil for params currently in common with external tools. + URLTokenUtil tokenUtil = new URLTokenUtil(d, null, apiToken, localeCode); + String appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost"); + String callback = null; if (upload) { - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/upload?datasetPid={datasetPid}&siteUrl={siteUrl}&apiToken={apiToken}&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; + appUrl = appUrl + "/upload?dvLocale={localeCode}"; + callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusUploadParameters?locale=" + localeCode; } else { - if (df == null) { - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/download?datasetPid={datasetPid}&siteUrl={siteUrl}" - + ((apiToken != null) ? "&apiToken={apiToken}" : "") - + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}"; - } else { - String rawStorageId = df.getStorageIdentifier(); - rawStorageId=rawStorageId.substring(rawStorageId.lastIndexOf(":")+1); - appUrl = settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusAppUrl, "http://localhost") - + "/download-file?datasetPid={datasetPid}&siteUrl={siteUrl}" - + ((apiToken != null) ? "&apiToken={apiToken}" : "") - + "&datasetId={datasetId}&datasetVersion={datasetVersion}&dvLocale={localeCode}&fileId={fileId}&storageIdentifier=" - + rawStorageId + "&fileName=" + df.getCurrentName(); - } + // Download + JsonObject files = GlobusUtil.getFilesMap(dataFiles, d); + + String downloadId = UUID.randomUUID().toString(); + downloadCache.put(downloadId, files); + appUrl = appUrl + "/download?dvLocale={localeCode}"; + callback = SystemConfig.getDataverseSiteUrlStatic() + "/api/v1/datasets/" + d.getId() + + "/globusDownloadParameters?locale=" + localeCode + "&downloadId=" + downloadId; + + } + if (apiToken != null) { + callback = UrlSignerUtil.signUrl(callback, 5, apiToken.getAuthenticatedUser().getUserIdentifier(), + HttpMethod.GET, + JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + apiToken.getTokenString()); + } else { + // Shouldn't happen + logger.warning("Unable to get api token for user: " + user.getIdentifier()); } - return tokenUtil.replaceTokensWithValues(appUrl) + "&storePrefix=" + storePrefix; + appUrl = appUrl + "&callback=" + Base64.getEncoder().encodeToString(StringUtils.getBytesUtf8(callback)); + + String finalUrl = tokenUtil.replaceTokensWithValues(appUrl); + logger.fine("Calling app: " + finalUrl); + return finalUrl; } - public String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken) { - return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, null)); - + private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List downloadDFList) { + return URLTokenUtil.getScriptForUrl(getGlobusAppUrlForDataset(dataset, false, downloadDFList)); } - + @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(String jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { Integer countAll = 0; @@ -566,7 +642,7 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; @@ -585,41 +661,34 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger = logger; } - globusLogger.info("Starting an globusUpload "); - - String datasetIdentifier = dataset.getStorageIdentifier(); + logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); - // ToDo - use DataAccess methods? - String storageType = datasetIdentifier.substring(0, datasetIdentifier.indexOf("://") + 3); - datasetIdentifier = datasetIdentifier.substring(datasetIdentifier.indexOf("://") + 3); - - Thread.sleep(5000); - - JsonObject jsonObject = null; - try { - jsonObject = JsonUtil.getJsonObject(jsonData); - } catch (Exception jpe) { - jpe.printStackTrace(); - logger.log(Level.SEVERE, "Error parsing dataset json. Json: {0}", jsonData); - // TODO: I think an (parsing) exception should stop the process, shouldn't it? - } - logger.log(Level.INFO, "json: {0}", JsonUtil.prettyPrint(jsonObject)); - - String taskIdentifier = jsonObject.getString("taskIdentifier"); + String taskIdentifier = jsonData.getString("taskIdentifier"); - String ruleId = ""; - try { - ruleId = jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - logger.warning("NPE for jsonData object"); + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + logger.fine("Found rule: " + ruleId); + if (ruleId != null) { + Long datasetId = rulesCache.getIfPresent(ruleId); + if (datasetId != null) { + // Will not delete rule + rulesCache.invalidate(ruleId); + } } + // Wait before first check + Thread.sleep(5000); // globus task status check - GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + globusLogger.info("Starting a globusUpload "); + + if (ruleId != null) { + // Transfer is complete, so delete rule + deletePermission(ruleId, dataset, globusLogger); + } // If success, switch to an EditInProgress lock - do this before removing the @@ -661,21 +730,30 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonObject.getJsonArray("files"); + JsonArray filesJsonArray = jsonData.getJsonArray("files"); if (filesJsonArray != null) { + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from // externalTool String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] bits = storageIdentifier.split(":"); - String bucketName = bits[1].replace("/", ""); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if (bits.length > 1) { + bucketName = bits[0]; + } String fileId = bits[bits.length - 1]; // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storageType + bucketName + "/" + datasetIdentifier + "/" + fileId; + // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; String fileName = fileJsonObject.getString("fileName"); inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); @@ -684,7 +762,8 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin // calculateMissingMetadataFields: checksum, mimetype JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -692,29 +771,33 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin countAll++; String storageIdentifier = fileJsonObject.getString("storageIdentifier"); String fileName = fileJsonObject.getString("fileName"); - String directoryLabel = fileJsonObject.getString("directoryLabel"); - String[] bits = storageIdentifier.split(":"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + if (bits.length > 1) { + } String fileId = bits[bits.length - 1]; List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder() - .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder() - .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(fileJsonObject); - countSuccess++; - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } + logger.fine("List Size: " + newfileJsonObject.size()); + // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + jsonDataSecondAPI.add(fileJsonObject); + countSuccess++; + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing + // values "); + // countError++; + // } } else { globusLogger.info(fileName + " will be skipped from adding to dataset by second API due to missing values "); @@ -731,6 +814,9 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; System.out.println("*******====command ==== " + command); + // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of + // calling API + String output = addFilesAsync(command, globusLogger); if (output.equalsIgnoreCase("ok")) { // if(!taskSkippedFiles) @@ -757,10 +843,6 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin globusLogger.info("Files failures: " + countError.toString()); globusLogger.info("Finished upload via Globus job."); - if (fileHandlerSuceeded) { - fileHandler.close(); - } - } catch (Exception e) { logger.info("Exception from globusUpload call "); e.printStackTrace(); @@ -768,6 +850,13 @@ public void globusUpload(String jsonData, ApiToken token, Dataset dataset, Strin datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); } } + if (ruleId != null) { + deletePermission(ruleId, dataset, globusLogger); + globusLogger.info("Removed upload permission: " + ruleId); + } + if (fileHandlerSuceeded) { + fileHandler.close(); + } } public String addFilesAsync(String curlCommand, Logger globusLogger) @@ -809,18 +898,16 @@ private String addFiles(String curlCommand, Logger globusLogger) { sb.append(line); globusLogger.info(" API Output : " + sb.toString()); JsonObject jsonObject = null; - try { - jsonObject = JsonUtil.getJsonObject(sb.toString()); - } catch (Exception jpe) { - jpe.printStackTrace(); - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); - // TODO: a parsing exception should cause the process to stop. - } + jsonObject = JsonUtil.getJsonObject(sb.toString()); status = jsonObject.getString("status"); } catch (Exception ex) { - globusLogger.log(Level.SEVERE, - "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + if (ex instanceof JsonParsingException) { + globusLogger.log(Level.SEVERE, "Error parsing dataset json."); + } else { + globusLogger.log(Level.SEVERE, + "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); + } } return status; @@ -833,7 +920,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusDownload" + logTimestamp); - String logFileName = "../logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusDownload_id_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; boolean fileHandlerSuceeded; @@ -852,7 +939,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro globusLogger = logger; } - globusLogger.info("Starting an globusDownload "); + globusLogger.info("Starting a globusDownload "); JsonObject jsonObject = null; try { @@ -864,29 +951,48 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } String taskIdentifier = jsonObject.getString("taskIdentifier"); - String ruleId = ""; - try { - jsonObject.getString("ruleId"); - } catch (NullPointerException npe) { - - } + GlobusEndpoint endpoint = getGlobusEndpoint(dataset); + logger.fine("Endpoint path: " + endpoint.getBasePath()); + // If the rules_cache times out, the permission will be deleted. Presumably that + // doesn't affect a // globus task status check - GlobusTask task = globusStatusCheck(taskIdentifier, globusLogger); + GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, task.getOwner_id(), "r"); + if (ruleId != null) { + logger.fine("Found rule: " + ruleId); + Long datasetId = rulesCache.getIfPresent(ruleId); + if (datasetId != null) { + logger.fine("Deleting from cache: rule: " + ruleId); + // Will not delete rule + rulesCache.invalidate(ruleId); + } + } else { + // Something is wrong - the rule should be there (a race with the cache timing + // out?) + logger.warning("ruleId not found for taskId: " + taskIdentifier); + } + task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); String taskStatus = getTaskStatus(task); - if (ruleId.length() > 0) { - deletePermision(ruleId, globusLogger); + // Transfer is done (success or failure) so delete the rule + if (ruleId != null) { + logger.fine("Deleting: rule: " + ruleId); + deletePermission(ruleId, dataset, globusLogger); } if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { String comment = "Reason : " + taskStatus.split("#")[1] + "
    Short Description : " + taskStatus.split("#")[2]; - userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - globusLogger.info("Globus task failed during download process"); - } else { + if (authUser != null && authUser instanceof AuthenticatedUser) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSDOWNLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); + } + + globusLogger.info("Globus task failed during download process: "+comment); + } else if (authUser != null && authUser instanceof AuthenticatedUser) { + boolean taskSkippedFiles = (task.getSkip_source_errors() == null) ? false : task.getSkip_source_errors(); if (!taskSkippedFiles) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -902,18 +1008,18 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private GlobusTask globusStatusCheck(String taskId, Logger globusLogger) throws MalformedURLException { + private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) + throws MalformedURLException { boolean taskCompletion = false; String status = ""; GlobusTask task = null; - int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault(settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); - AccessToken clientTokenUser = getClientToken(); - // success = globusServiceBean.getSuccessfulTransfers(clientTokenUser, taskId); - task = getTask(clientTokenUser, taskId, globusLogger); + task = getTask(endpoint.getClientToken(), taskId, globusLogger); if (task != null) { status = task.getStatus(); if (status != null) { @@ -956,7 +1062,7 @@ private String getTaskStatus(GlobusTask task) { if (task != null) { status = task.getStatus(); if (status != null) { - // The task is in progress. + // The task is in progress but is not ok or queued if (status.equalsIgnoreCase("ACTIVE")) { status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); } else { @@ -986,7 +1092,7 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger .collect(Collectors.toList()); }); - CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { + CompletableFuture completableFuture = allCompletableFuture.thenApply(files -> { return files.stream().map(d -> json(d)).collect(toJsonArray()); }); @@ -999,7 +1105,6 @@ public JsonObject calculateMissingMetadataFields(List inputList, Logger } private CompletableFuture calculateDetailsAsync(String id, Logger globusLogger) { - // logger.info(" calcualte additional details for these globus id ==== " + id); return CompletableFuture.supplyAsync(() -> { try { @@ -1027,7 +1132,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fullPath = id.split("IDsplit")[1]; String fileName = id.split("IDsplit")[2]; - // ToDo: what if the file doesnot exists in s3 + // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed do { @@ -1038,9 +1143,9 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) count = 3; } catch (IOException ioex) { count = 3; - logger.info(ioex.getMessage()); - globusLogger.info("S3AccessIO: DataFile (fullPAth " + fullPath - + ") does not appear to be an S3 object associated with driver: "); + logger.fine(ioex.getMessage()); + globusLogger.info( + "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); } catch (Exception ex) { count = count + 1; ex.printStackTrace(); @@ -1051,7 +1156,7 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) } while (count < 3); if (checksumVal.length() == 0) { - checksumVal = "NULL"; + checksumVal = "Not available in Dataverse"; } String mimeType = calculatemime(fileName); @@ -1067,7 +1172,7 @@ public String calculatemime(String fileName) throws InterruptedException { String finalType = FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; String type = FileUtil.determineFileTypeByNameAndExtension(fileName); - if (type!=null && !type.isBlank()) { + if (type != null && !type.isBlank()) { if (FileUtil.useRecognizedType(finalType, type)) { finalType = type; } @@ -1075,194 +1180,106 @@ public String calculatemime(String fileName) throws InterruptedException { return finalType; } - /* - * public boolean globusFinishTransfer(Dataset dataset, AuthenticatedUser user) - * throws MalformedURLException { - * - * logger.info("=====Tasklist == dataset id :" + dataset.getId()); String - * directory = null; - * - * try { - * - * List fileMetadatas = new ArrayList<>(); - * - * StorageIO datasetSIO = DataAccess.getStorageIO(dataset); - * - * - * - * DatasetVersion workingVersion = dataset.getEditVersion(); - * - * if (workingVersion.getCreateTime() != null) { - * workingVersion.setCreateTime(new Timestamp(new Date().getTime())); } - * - * directory = dataset.getAuthorityForFileStorage() + "/" + - * dataset.getIdentifierForFileStorage(); - * - * System.out.println("======= directory ==== " + directory + - * " ==== datasetId :" + dataset.getId()); Map checksumMapOld - * = new HashMap<>(); - * - * Iterator fmIt = workingVersion.getFileMetadatas().iterator(); - * - * while (fmIt.hasNext()) { FileMetadata fm = fmIt.next(); if (fm.getDataFile() - * != null && fm.getDataFile().getId() != null) { String chksum = - * fm.getDataFile().getChecksumValue(); if (chksum != null) { - * checksumMapOld.put(chksum, 1); } } } - * - * List dFileList = new ArrayList<>(); boolean update = false; for - * (S3ObjectSummary s3ObjectSummary : datasetSIO.listAuxObjects("")) { - * - * String s3ObjectKey = s3ObjectSummary.getKey(); - * - * - * String t = s3ObjectKey.replace(directory, ""); - * - * if (t.indexOf(".") > 0) { long totalSize = s3ObjectSummary.getSize(); String - * filePath = s3ObjectKey; String fileName = - * filePath.split("/")[filePath.split("/").length - 1]; String fullPath = - * datasetSIO.getStorageLocation() + "/" + fileName; - * - * logger.info("Full path " + fullPath); StorageIO dataFileStorageIO = - * DataAccess.getDirectStorageIO(fullPath); InputStream in = - * dataFileStorageIO.getInputStream(); - * - * String checksumVal = FileUtil.calculateChecksum(in, - * DataFile.ChecksumType.MD5); //String checksumVal = s3ObjectSummary.getETag(); - * logger.info("The checksum is " + checksumVal); if - * ((checksumMapOld.get(checksumVal) != null)) { logger.info("datasetId :" + - * dataset.getId() + "======= filename ==== " + filePath + - * " == file already exists "); } else if (filePath.contains("cached") || - * filePath.contains(".thumb")) { logger.info(filePath + " is ignored"); } else - * { update = true; logger.info("datasetId :" + dataset.getId() + - * "======= filename ==== " + filePath + " == new file "); try { - * - * DataFile datafile = new DataFile(DataFileServiceBean.MIME_TYPE_GLOBUS_FILE); - * //MIME_TYPE_GLOBUS datafile.setModificationTime(new Timestamp(new - * Date().getTime())); datafile.setCreateDate(new Timestamp(new - * Date().getTime())); datafile.setPermissionModificationTime(new Timestamp(new - * Date().getTime())); - * - * FileMetadata fmd = new FileMetadata(); - * - * - * fmd.setLabel(fileName); fmd.setDirectoryLabel(filePath.replace(directory, - * "").replace(File.separator + fileName, "")); - * - * fmd.setDataFile(datafile); - * - * datafile.getFileMetadatas().add(fmd); - * - * FileUtil.generateS3PackageStorageIdentifierForGlobus(datafile); - * logger.info("==== datasetId :" + dataset.getId() + "======= filename ==== " - * + filePath + " == added to datafile, filemetadata "); - * - * try { // We persist "SHA1" rather than "SHA-1". - * //datafile.setChecksumType(DataFile.ChecksumType.SHA1); - * datafile.setChecksumType(DataFile.ChecksumType.MD5); - * datafile.setChecksumValue(checksumVal); } catch (Exception cksumEx) { - * logger.info("==== datasetId :" + dataset.getId() + - * "======Could not calculate checksumType signature for the new file "); } - * - * datafile.setFilesize(totalSize); - * - * dFileList.add(datafile); - * - * } catch (Exception ioex) { logger.info("datasetId :" + dataset.getId() + - * "======Failed to process and/or save the file " + ioex.getMessage()); return - * false; - * - * } } } } if (update) { - * - * List filesAdded = new ArrayList<>(); - * - * if (dFileList != null && dFileList.size() > 0) { - * - * // Dataset dataset = version.getDataset(); - * - * for (DataFile dataFile : dFileList) { - * - * if (dataFile.getOwner() == null) { dataFile.setOwner(dataset); - * - * workingVersion.getFileMetadatas().add(dataFile.getFileMetadata()); - * dataFile.getFileMetadata().setDatasetVersion(workingVersion); - * dataset.getFiles().add(dataFile); - * - * } - * - * filesAdded.add(dataFile); - * - * } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ===== Done! Finished saving new files to the dataset."); } - * - * fileMetadatas.clear(); for (DataFile addedFile : filesAdded) { - * fileMetadatas.add(addedFile.getFileMetadata()); } filesAdded = null; - * - * if (workingVersion.isDraft()) { - * - * logger.info("Async: ==== datasetId :" + dataset.getId() + - * " ==== inside draft version "); - * - * Timestamp updateTime = new Timestamp(new Date().getTime()); - * - * workingVersion.setLastUpdateTime(updateTime); - * dataset.setModificationTime(updateTime); - * - * - * for (FileMetadata fileMetadata : fileMetadatas) { - * - * if (fileMetadata.getDataFile().getCreateDate() == null) { - * fileMetadata.getDataFile().setCreateDate(updateTime); - * fileMetadata.getDataFile().setCreator((AuthenticatedUser) user); } - * fileMetadata.getDataFile().setModificationTime(updateTime); } - * - * - * } else { logger.info("datasetId :" + dataset.getId() + - * " ==== inside released version "); - * - * for (int i = 0; i < workingVersion.getFileMetadatas().size(); i++) { for - * (FileMetadata fileMetadata : fileMetadatas) { if - * (fileMetadata.getDataFile().getStorageIdentifier() != null) { - * - * if (fileMetadata.getDataFile().getStorageIdentifier().equals(workingVersion. - * getFileMetadatas().get(i).getDataFile().getStorageIdentifier())) { - * workingVersion.getFileMetadatas().set(i, fileMetadata); } } } } - * - * - * } - * - * - * try { Command cmd; logger.info("Async: ==== datasetId :" + - * dataset.getId() + - * " ======= UpdateDatasetVersionCommand START in globus function "); cmd = new - * UpdateDatasetVersionCommand(dataset, new DataverseRequest(user, - * (HttpServletRequest) null)); ((UpdateDatasetVersionCommand) - * cmd).setValidateLenient(true); //new DataverseRequest(authenticatedUser, - * (HttpServletRequest) null) //dvRequestService.getDataverseRequest() - * commandEngine.submit(cmd); } catch (CommandException ex) { - * logger.log(Level.WARNING, "==== datasetId :" + dataset.getId() + - * "======CommandException updating DatasetVersion from batch job: " + - * ex.getMessage()); return false; } - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL COMPLETED SUCCESSFULLY "); - * - * //return true; } - * - * } catch (Exception e) { String message = e.getMessage(); - * - * logger.info("==== datasetId :" + dataset.getId() + - * " ======= GLOBUS CALL Exception ============== " + message); - * e.printStackTrace(); return false; //return - * error(Response.Status.INTERNAL_SERVER_ERROR, - * "Uploaded files have passed checksum validation but something went wrong while attempting to move the files into Dataverse. Message was '" - * + message + "'."); } - * - * String globusBasicToken = - * settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusBasicToken, ""); - * AccessToken clientTokenUser = getClientToken(globusBasicToken); - * updatePermision(clientTokenUser, directory, "identity", "r"); return true; } - * - */ + + private GlobusEndpoint getGlobusEndpoint(DvObject dvObject) { + Dataset dataset = null; + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + dataset = (Dataset) dvObject.getOwner(); + } else { + throw new IllegalArgumentException("Unsupported DvObject type: " + dvObject.getClass().getName()); + } + String driverId = dataset.getEffectiveStorageDriverId(); + GlobusEndpoint endpoint = null; + + String directoryPath = GlobusAccessibleStore.getTransferPath(driverId); + + if (GlobusAccessibleStore.isDataverseManaged(driverId) && (dataset != null)) { + directoryPath = directoryPath + "/" + dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); + } else { + // remote store - may have path in file storageidentifier + String relPath = dvObject.getStorageIdentifier() + .substring(dvObject.getStorageIdentifier().lastIndexOf("//") + 2); + int filenameStart = relPath.lastIndexOf("/") + 1; + if (filenameStart > 0) { + directoryPath = directoryPath + relPath.substring(0, filenameStart); + } + } + logger.fine("directoryPath finally: " + directoryPath); + + String endpointId = GlobusAccessibleStore.getTransferEndpointId(driverId); + + logger.fine("endpointId: " + endpointId); + + String globusToken = GlobusAccessibleStore.getGlobusToken(driverId); + + AccessToken accessToken = GlobusServiceBean.getClientToken(globusToken); + String clientToken = accessToken.getOtherTokens().get(0).getAccessToken(); + endpoint = new GlobusEndpoint(endpointId, clientToken, directoryPath); + + return endpoint; + } + + // This helper method is called from the Download terms/guestbook/etc. popup, + // when the user clicks the "ok" button. We use it, instead of calling + // downloadServiceBean directly, in order to differentiate between single + // file downloads and multiple (batch) downloads - since both use the same + // terms/etc. popup. + public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, + boolean doNotSaveGuestbookResponse) { + PrimeFaces.current().executeScript("PF('guestbookAndTermsPopup').hide()"); + guestbookResponse.setEventType(GuestbookResponse.DOWNLOAD); + + ApiToken apiToken = null; + User user = session.getUser(); + if (user instanceof AuthenticatedUser) { + apiToken = authSvc.findApiTokenByUser((AuthenticatedUser) user); + } else if (user instanceof PrivateUrlUser) { + PrivateUrlUser privateUrlUser = (PrivateUrlUser) user; + PrivateUrl privUrl = privateUrlService.getPrivateUrlFromDatasetId(privateUrlUser.getDatasetId()); + apiToken = new ApiToken(); + apiToken.setTokenString(privUrl.getToken()); + } + + DataFile df = guestbookResponse.getDataFile(); + if (df != null) { + logger.fine("Single datafile case for writeGuestbookAndStartTransfer"); + List downloadDFList = new ArrayList(1); + downloadDFList.add(df); + if (!doNotSaveGuestbookResponse) { + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, downloadDFList)); + } else { + // Following FileDownloadServiceBean writeGuestbookAndStartBatchDownload + List list = new ArrayList<>(Arrays.asList(guestbookResponse.getSelectedFileIds().split(","))); + List selectedFiles = new ArrayList(); + for (String idAsString : list) { + try { + Long fileId = Long.parseLong(idAsString); + // If we need to create a GuestBookResponse record, we have to + // look up the DataFile object for this file: + df = dataFileService.findCheapAndEasy(fileId); + selectedFiles.add(df); + if (!doNotSaveGuestbookResponse) { + guestbookResponse.setDataFile(df); + fileDownloadService.writeGuestbookResponseRecord(guestbookResponse); + } + } catch (NumberFormatException nfe) { + logger.warning( + "A file id passed to the writeGuestbookAndStartTransfer method as a string could not be converted back to Long: " + + idAsString); + return; + } + + } + if (!selectedFiles.isEmpty()) { + // Use dataset from one file - files should all be from the same dataset + PrimeFaces.current().executeScript(getGlobusDownloadScript(df.getOwner(), apiToken, selectedFiles)); + } + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java new file mode 100644 index 00000000000..92cf8ac7704 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.globus; + +import java.util.List; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +public class GlobusUtil { + + public static JsonObject getFilesMap(List dataFiles, Dataset d) { + JsonObjectBuilder filesBuilder = Json.createObjectBuilder(); + for (DataFile df : dataFiles) { + String storageId = df.getStorageIdentifier(); + String[] parts = DataAccess + .getDriverIdAndStorageLocation(DataAccess.getLocationFromStorageId(storageId, d)); + String driverId = parts[0]; + String fileLocation = parts[1]; + if (GlobusAccessibleStore.isDataverseManaged(driverId)) { + String endpointWithBasePath = GlobusAccessibleStore.getTransferEnpointWithPath(driverId); + fileLocation = endpointWithBasePath + "/" + fileLocation; + } else { + fileLocation = storageId.substring(storageId.lastIndexOf("//") + 2); + } + filesBuilder.add(df.getId().toString(), fileLocation); + } + return filesBuilder.build(); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 20884e3360c..e0b5c2dfbfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -88,7 +88,6 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_FAILED="failed"; public static final String DATAVERSE_PROPRIETARY_METADATA_FORMAT="dataverse_json"; public static final String DATAVERSE_PROPRIETARY_METADATA_API="/api/datasets/export?exporter="+DATAVERSE_PROPRIETARY_METADATA_FORMAT+"&persistentId="; - public static final String DATAVERSE_HARVEST_STOP_FILE="../logs/stopharvest_"; public HarvesterServiceBean() { @@ -399,7 +398,7 @@ private void deleteHarvestedDatasetIfExists(String persistentIdentifier, Dataver private boolean checkIfStoppingJob(HarvestingClient harvestingClient) { Long pid = ProcessHandle.current().pid(); - String stopFileName = DATAVERSE_HARVEST_STOP_FILE + harvestingClient.getName() + "." + pid; + String stopFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "stopharvest_" + harvestingClient.getName() + "." + pid; Path stopFilePath = Paths.get(stopFileName); if (Files.exists(stopFilePath)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 40db55f2a0c..0667f5594ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -243,6 +243,14 @@ public String getCustomHttpHeaders() { public void setCustomHttpHeaders(String customHttpHeaders) { this.customHttpHeaders = customHttpHeaders; } + + private boolean allowHarvestingMissingCVV; + public boolean getAllowHarvestingMissingCVV() { + return allowHarvestingMissingCVV; + } + public void setAllowHarvestingMissingCVV(boolean allowHarvestingMissingCVV) { + this.allowHarvestingMissingCVV = allowHarvestingMissingCVV; + } // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java index 1b4a7bc7db0..cc15d4c978b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAIRecordServiceBean.java @@ -40,10 +40,6 @@ @Stateless @Named public class OAIRecordServiceBean implements java.io.Serializable { - @EJB - OAISetServiceBean oaiSetService; - @EJB - IndexServiceBean indexService; @EJB DatasetServiceBean datasetService; @EJB @@ -55,13 +51,24 @@ public class OAIRecordServiceBean implements java.io.Serializable { EntityManager em; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean"); - - public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport) { - updateOaiRecords(setName, datasetIds, updateTime, doExport, logger); - } - public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport, Logger setUpdateLogger) { - + /** + * Updates the OAI records for the set specified + * @param setName name of the OAI set + * @param datasetIds ids of the datasets that are candidates for this OAI set + * @param updateTime time stamp + * @param doExport attempt to export datasets that haven't been exported yet + * @param confirmed true if the datasetIds above were looked up in the database + * - as opposed to in the search engine. Meaning, that it is + * confirmed that any dataset not on this list that's currently + * in the set is no longer in the database and should be + * marked as deleted without any further checks. Otherwise + * we'll want to double-check if the dataset still exists + * as published. This is to prevent marking existing datasets + * as deleted during a full reindex etc. + * @param setUpdateLogger dedicated Logger + */ + public void updateOaiRecords(String setName, List datasetIds, Date updateTime, boolean doExport, boolean confirmed, Logger setUpdateLogger) { // create Map of OaiRecords List oaiRecords = findOaiRecordsBySetName(setName); Map recordMap = new HashMap<>(); @@ -101,9 +108,6 @@ public void updateOaiRecords(String setName, List datasetIds, Date updateT DatasetVersion releasedVersion = dataset.getReleasedVersion(); Date publicationDate = releasedVersion == null ? null : releasedVersion.getReleaseTime(); - //if (dataset.getPublicationDate() != null - // && (dataset.getLastExportTime() == null - // || dataset.getLastExportTime().before(dataset.getPublicationDate()))) { if (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate))) { @@ -125,7 +129,9 @@ public void updateOaiRecords(String setName, List datasetIds, Date updateT } // anything left in the map should be marked as removed! - markOaiRecordsAsRemoved( recordMap.values(), updateTime, setUpdateLogger); + markOaiRecordsAsRemoved(recordMap.values(), updateTime, confirmed, setUpdateLogger); + + } @@ -162,7 +168,7 @@ record = new OAIRecord(setName, dataset.getGlobalId().asString(), new Date()); } } - + /* // Updates any existing OAI records for this dataset // Should be called whenever there's a change in the release status of the Dataset // (i.e., when it's published or deaccessioned), so that the timestamps and @@ -201,13 +207,31 @@ public void updateOaiRecordsForDataset(Dataset dataset) { logger.fine("Null returned - no records found."); } } +*/ - public void markOaiRecordsAsRemoved(Collection records, Date updateTime, Logger setUpdateLogger) { + public void markOaiRecordsAsRemoved(Collection records, Date updateTime, boolean confirmed, Logger setUpdateLogger) { for (OAIRecord oaiRecord : records) { if ( !oaiRecord.isRemoved() ) { - setUpdateLogger.fine("marking OAI record "+oaiRecord.getGlobalId()+" as removed"); - oaiRecord.setRemoved(true); - oaiRecord.setLastUpdateTime(updateTime); + boolean confirmedRemoved = confirmed; + if (!confirmedRemoved) { + Dataset lookedUp = datasetService.findByGlobalId(oaiRecord.getGlobalId()); + if (lookedUp == null) { + confirmedRemoved = true; + } else if (lookedUp.getLastExportTime() == null) { + confirmedRemoved = true; + } else { + boolean isReleased = lookedUp.getReleasedVersion() != null; + if (!isReleased) { + confirmedRemoved = true; + } + } + } + + if (confirmedRemoved) { + setUpdateLogger.fine("marking OAI record "+oaiRecord.getGlobalId()+" as removed"); + oaiRecord.setRemoved(true); + oaiRecord.setLastUpdateTime(updateTime); + } } else { setUpdateLogger.fine("OAI record "+oaiRecord.getGlobalId()+" is already marked as removed."); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java index 2bd666401c7..242187db7f1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/OAISetServiceBean.java @@ -25,6 +25,7 @@ import jakarta.inject.Named; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.BaseHttpSolrClient.RemoteSolrException; @@ -121,6 +122,25 @@ public List findAllNamedSets() { } } + /** + * "Active" sets are the ones that have been successfully exported, and contain + * a non-zero number of records. (Although a set that contains a number of + * records that are all marked as "deleted" is still an active set!) + * @return list of OAISets + */ + public List findAllActiveNamedSets() { + String jpaQueryString = "select object(o) " + + "from OAISet as o, OAIRecord as r " + + "where r.setName = o.spec " + + "and o.spec != '' " + + "group by o order by o.spec"; + + Query query = em.createQuery(jpaQueryString); + List queryResults = query.getResultList(); + + return queryResults; + } + @Asynchronous public void remove(Long setId) { OAISet oaiSet = find(setId); @@ -151,6 +171,8 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { String query = managedSet.getDefinition(); List datasetIds; + boolean databaseLookup = false; // As opposed to a search engine lookup + try { if (!oaiSet.isDefaultSet()) { datasetIds = expandSetQuery(query); @@ -161,6 +183,7 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { // including the unpublished drafts and deaccessioned ones. // Those will be filtered out further down the line. datasetIds = datasetService.findAllLocalDatasetIds(); + databaseLookup = true; } } catch (OaiSetException ose) { datasetIds = null; @@ -171,7 +194,7 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { // they will be properly marked as "deleted"! -- L.A. 4.5 //if (datasetIds != null && !datasetIds.isEmpty()) { exportLogger.info("Calling OAI Record Service to re-export " + datasetIds.size() + " datasets."); - oaiRecordService.updateOaiRecords(managedSet.getSpec(), datasetIds, new Date(), true, exportLogger); + oaiRecordService.updateOaiRecords(managedSet.getSpec(), datasetIds, new Date(), true, databaseLookup, exportLogger); //} managedSet.setUpdateInProgress(false); @@ -180,7 +203,7 @@ public void exportOaiSet(OAISet oaiSet, Logger exportLogger) { public void exportAllSets() { String logTimestamp = logFormatter.format(new Date()); Logger exportLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.OAISetServiceBean." + "UpdateAllSets." + logTimestamp); - String logFileName = "../logs" + File.separator + "oaiSetsUpdate_" + logTimestamp + ".log"; + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "oaiSetsUpdate_" + logTimestamp + ".log"; FileHandler fileHandler = null; boolean fileHandlerSuceeded = false; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java index 96a19acc0e8..f9047e3ee5f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/web/servlet/OAIServlet.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse.harvest.server.web.servlet; +import edu.harvard.iq.dataverse.MailServiceBean; import io.gdcc.xoai.dataprovider.DataProvider; import io.gdcc.xoai.dataprovider.repository.Repository; import io.gdcc.xoai.dataprovider.repository.RepositoryConfiguration; @@ -31,6 +32,7 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.MailUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import io.gdcc.xoai.exceptions.BadVerbException; import io.gdcc.xoai.exceptions.OAIException; import io.gdcc.xoai.model.oaipmh.Granularity; import io.gdcc.xoai.services.impl.SimpleResumptionTokenFormat; @@ -38,6 +40,7 @@ import java.io.IOException; +import java.util.Optional; import java.util.logging.Logger; import jakarta.ejb.EJB; import jakarta.inject.Inject; @@ -48,6 +51,7 @@ import jakarta.servlet.http.HttpServlet; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; +import java.util.Map; import javax.xml.stream.XMLStreamException; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; @@ -65,14 +69,14 @@ public class OAIServlet extends HttpServlet { @EJB OAIRecordServiceBean recordService; @EJB - SettingsServiceBean settingsService; - @EJB DataverseServiceBean dataverseService; @EJB DatasetServiceBean datasetService; @EJB SystemConfig systemConfig; + @EJB + MailServiceBean mailServiceBean; @Inject @ConfigProperty(name = "dataverse.oai.server.maxidentifiers", defaultValue="100") @@ -192,9 +196,13 @@ private RepositoryConfiguration createRepositoryConfiguration() { // (Note: if the setting does not exist, we are going to assume that they // have a reason not to want to configure their email address, if it is // a developer's instance, for example; or a reason not to want to - // advertise it to the world.) - InternetAddress systemEmailAddress = MailUtil.parseSystemAddress(settingsService.getValueForKey(SettingsServiceBean.Key.SystemEmail)); - String systemEmailLabel = systemEmailAddress != null ? systemEmailAddress.getAddress() : "donotreply@localhost"; + // advertise it to the world.) + String systemEmailLabel = "donotreply@localhost"; + // TODO: should we expose the support team's address if configured? + Optional systemAddress = mailServiceBean.getSystemAddress(); + if (systemAddress.isPresent()) { + systemEmailLabel = systemAddress.get().getAddress(); + } RepositoryConfiguration configuration = new RepositoryConfiguration.RepositoryConfigurationBuilder() .withAdminEmail(systemEmailLabel) @@ -256,10 +264,16 @@ private void processRequest(HttpServletRequest httpServletRequest, HttpServletRe "Sorry. OAI Service is disabled on this Dataverse node."); return; } - - RawRequest rawRequest = RequestBuilder.buildRawRequest(httpServletRequest.getParameterMap()); - - OAIPMH handle = dataProvider.handle(rawRequest); + + Map params = httpServletRequest.getParameterMap(); + OAIPMH handle; + try { + RawRequest rawRequest = RequestBuilder.buildRawRequest(params); + handle = dataProvider.handle(rawRequest); + } catch (BadVerbException bve) { + handle = dataProvider.handle(params); + } + response.setContentType("text/xml;charset=UTF-8"); try (XmlWriter xmlWriter = new XmlWriter(response.getOutputStream(), repositoryConfiguration);) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java index b4e275b6059..1e713b08adb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiSetRepository.java @@ -35,7 +35,7 @@ public void setSetService(OAISetServiceBean setService) { @Override public boolean supportSets() { - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); if (dataverseOAISets == null || dataverseOAISets.isEmpty()) { return false; @@ -46,7 +46,7 @@ public boolean supportSets() { @Override public List getSets() { logger.fine("calling retrieveSets()"); - List dataverseOAISets = setService.findAllNamedSets(); + List dataverseOAISets = setService.findAllActiveNamedSets(); List XOAISets = new ArrayList(); if (dataverseOAISets != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 40dc3d6fdd6..9bacafd173f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -48,6 +48,8 @@ import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; import edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator; +import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; +import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.datavariable.SummaryStatistic; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.ingest.metadataextraction.FileMetadataExtractor; @@ -70,7 +72,10 @@ import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReader; import edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.por.PORFileReaderSpi; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; import edu.harvard.iq.dataverse.util.*; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import org.apache.commons.io.IOUtils; //import edu.harvard.iq.dvn.unf.*; @@ -121,6 +126,7 @@ import jakarta.jms.Message; import jakarta.faces.application.FacesMessage; import jakarta.ws.rs.core.MediaType; +import java.text.MessageFormat; import ucar.nc2.NetcdfFile; import ucar.nc2.NetcdfFiles; @@ -146,6 +152,8 @@ public class IngestServiceBean { @EJB AuxiliaryFileServiceBean auxiliaryFileService; @EJB + StorageUseServiceBean storageUseService; + @EJB SystemConfig systemConfig; @Resource(lookup = "java:app/jms/queue/ingest") @@ -158,7 +166,8 @@ public class IngestServiceBean { private static String dateTimeFormat_ymdhmsS = "yyyy-MM-dd HH:mm:ss.SSS"; private static String dateFormat_ymd = "yyyy-MM-dd"; - // This method tries to permanently store new files on the filesystem. + // This method tries to permanently store new files in storage (on the filesystem, + // in an S3 bucket, etc.). // Then it adds the files that *have been successfully saved* to the // dataset (by attaching the DataFiles to the Dataset, and the corresponding // FileMetadatas to the DatasetVersion). It also tries to ensure that none @@ -167,282 +176,386 @@ public class IngestServiceBean { // DataFileCategory objects, if any were already assigned to the files). // It must be called before we attempt to permanently save the files in // the database by calling the Save command on the dataset and/or version. + + // !! There is way too much going on in this method. :( !! + + // @todo: Is this method a good candidate for turning into a dedicated Command? public List saveAndAddFilesToDataset(DatasetVersion version, - List newFiles, - DataFile fileToReplace, - boolean tabIngest) { - List ret = new ArrayList<>(); - - if (newFiles != null && newFiles.size() > 0) { - // ret = new ArrayList<>(); - // final check for duplicate file names; - // we tried to make the file names unique on upload, but then - // the user may have edited them on the "add files" page, and - // renamed FOOBAR-1.txt back to FOOBAR.txt... + List newFiles, + DataFile fileToReplace, + boolean tabIngest) { + UploadSessionQuotaLimit uploadSessionQuota = null; + List ret = new ArrayList<>(); + + if (newFiles != null && newFiles.size() > 0) { + // ret = new ArrayList<>(); + // final check for duplicate file names; + // we tried to make the file names unique on upload, but then + // the user may have edited them on the "add files" page, and + // renamed FOOBAR-1.txt back to FOOBAR.txt... IngestUtil.checkForDuplicateFileNamesFinal(version, newFiles, fileToReplace); - Dataset dataset = version.getDataset(); - - for (DataFile dataFile : newFiles) { - boolean unattached = false; - boolean savedSuccess = false; - if (dataFile.getOwner() == null) { - unattached = true; - dataFile.setOwner(dataset); - } + Dataset dataset = version.getDataset(); + long totalBytesSaved = 0L; - String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); - String driverType = DataAccess.getDriverType(storageInfo[0]); - String storageLocation = storageInfo[1]; - String tempFileLocation = null; - Path tempLocationPath = null; - if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver - tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; - - // Try to save the file in its permanent location: - tempLocationPath = Paths.get(tempFileLocation); - WritableByteChannel writeChannel = null; - FileChannel readChannel = null; - - StorageIO dataAccess = null; - - try { - logger.fine("Attempting to create a new storageIO object for " + storageLocation); - dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); - - logger.fine("Successfully created a new storageIO object."); - /* - * This commented-out code demonstrates how to copy bytes from a local - * InputStream (or a readChannel) into the writable byte channel of a Dataverse - * DataAccessIO object: - */ - - /* - * storageIO.open(DataAccessOption.WRITE_ACCESS); - * - * writeChannel = storageIO.getWriteChannel(); readChannel = new - * FileInputStream(tempLocationPath.toFile()).getChannel(); - * - * long bytesPerIteration = 16 * 1024; // 16K bytes long start = 0; while ( - * start < readChannel.size() ) { readChannel.transferTo(start, - * bytesPerIteration, writeChannel); start += bytesPerIteration; } - */ - - /* - * But it's easier to use this convenience method from the DataAccessIO: - * - * (if the underlying storage method for this file is local filesystem, the - * DataAccessIO will simply copy the file using Files.copy, like this: - * - * Files.copy(tempLocationPath, storageIO.getFileSystemLocation(), - * StandardCopyOption.REPLACE_EXISTING); - */ - dataAccess.savePath(tempLocationPath); - - // Set filesize in bytes - // - dataFile.setFilesize(dataAccess.getSize()); - savedSuccess = true; - logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); - - // TODO: reformat this file to remove the many tabs added in cc08330 - extractMetadataNcml(dataFile, tempLocationPath); - - } catch (IOException ioex) { - logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); - } finally { - if (readChannel != null) { - try { - readChannel.close(); - } catch (IOException e) { - } - } - if (writeChannel != null) { - try { - writeChannel.close(); - } catch (IOException e) { - } - } - } + if (systemConfig.isStorageQuotasEnforced()) { + // Check if this dataset is subject to any storage quotas: + uploadSessionQuota = fileService.getUploadSessionQuotaLimit(dataset); + } + + for (DataFile dataFile : newFiles) { + boolean unattached = false; + boolean savedSuccess = false; + if (dataFile.getOwner() == null) { + // is it ever "attached"? + // do we ever call this method with dataFile.getOwner() != null? + // - we really shouldn't be, either. + unattached = true; + dataFile.setOwner(dataset); + } + + String[] storageInfo = DataAccess.getDriverIdAndStorageLocation(dataFile.getStorageIdentifier()); + String driverType = DataAccess.getDriverType(storageInfo[0]); + String storageLocation = storageInfo[1]; + String tempFileLocation = null; + Path tempLocationPath = null; + long confirmedFileSize = 0L; + if (driverType.equals("tmp")) { //"tmp" is the default if no prefix or the "tmp://" driver + tempFileLocation = FileUtil.getFilesTempDirectory() + "/" + storageLocation; + + // Try to save the file in its permanent location: + tempLocationPath = Paths.get(tempFileLocation); + WritableByteChannel writeChannel = null; + FileChannel readChannel = null; + + StorageIO dataAccess = null; + + try { + logger.fine("Attempting to create a new storageIO object for " + storageLocation); + dataAccess = DataAccess.createNewStorageIO(dataFile, storageLocation); + + logger.fine("Successfully created a new storageIO object."); + /** + * This commented-out code demonstrates how to copy + * bytes from a local InputStream (or a readChannel) + * into the writable byte channel of a Dataverse + * DataAccessIO object: + */ + + /** + * storageIO.open(DataAccessOption.WRITE_ACCESS); + * + * writeChannel = storageIO.getWriteChannel(); + * readChannel = new + * FileInputStream(tempLocationPath.toFile()).getChannel(); + * + * long bytesPerIteration = 16 * 1024; // 16K bytes long + * start = 0; + * while ( start < readChannel.size() ) { + * readChannel.transferTo(start, bytesPerIteration, writeChannel); start += bytesPerIteration; + * } + */ + + /** + * But it's easier to use this convenience method from + * the DataAccessIO: + * + * (if the underlying storage method for this file is + * local filesystem, the DataAccessIO will simply copy + * the file using Files.copy, like this: + * + * Files.copy(tempLocationPath, + * storageIO.getFileSystemLocation(), + * StandardCopyOption.REPLACE_EXISTING); + */ + dataAccess.savePath(tempLocationPath); + + // Set filesize in bytes + // + confirmedFileSize = dataAccess.getSize(); + dataFile.setFilesize(confirmedFileSize); + savedSuccess = true; + logger.fine("Success: permanently saved file " + dataFile.getFileMetadata().getLabel()); + + // TODO: reformat this file to remove the many tabs added in cc08330 - done, I think? + extractMetadataNcml(dataFile, tempLocationPath); + + } catch (IOException ioex) { + logger.warning("Failed to save the file, storage id " + dataFile.getStorageIdentifier() + " (" + ioex.getMessage() + ")"); + } finally { + if (readChannel != null) { + try { + readChannel.close(); + } catch (IOException e) { + } + } + if (writeChannel != null) { + try { + writeChannel.close(); + } catch (IOException e) { + } + } + } // Since we may have already spent some CPU cycles scaling down image thumbnails, - // we may as well save them, by moving these generated images to the permanent - // dataset directory. We should also remember to delete any such files in the - // temp directory: - List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), - storageLocation); - if (generatedTempFiles != null) { - for (Path generated : generatedTempFiles) { - if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to - // save the main file! - logger.fine("(Will also try to permanently save generated thumbnail file " - + generated.toString() + ")"); - try { - // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), - // generated.getFileName().toString())); - int i = generated.toString().lastIndexOf("thumb"); - if (i > 1) { - String extensionTag = generated.toString().substring(i); - dataAccess.savePathAsAux(generated, extensionTag); - logger.fine( - "Saved generated thumbnail as aux object. \"preview available\" status: " - + dataFile.isPreviewImageAvailable()); - } else { - logger.warning( - "Generated thumbnail file name does not match the expected pattern: " - + generated.toString()); - } - - } catch (IOException ioex) { - logger.warning("Failed to save generated file " + generated.toString()); - } - } - - // ... but we definitely want to delete it: - try { - Files.delete(generated); - } catch (IOException ioex) { - logger.warning("Failed to delete generated file " + generated.toString()); - } - } - } - // Any necessary post-processing: - // performPostProcessingTasks(dataFile); - } else { - try { - StorageIO dataAccess = DataAccess.getStorageIO(dataFile); - //Populate metadata - dataAccess.open(DataAccessOption.READ_ACCESS); - //set file size - logger.fine("Setting file size: " + dataAccess.getSize()); - dataFile.setFilesize(dataAccess.getSize()); - if(dataAccess instanceof S3AccessIO) { - ((S3AccessIO)dataAccess).removeTempTag(); - } - } catch (IOException ioex) { - logger.warning("Failed to get file size, storage id " + dataFile.getStorageIdentifier() + " (" - + ioex.getMessage() + ")"); - } - savedSuccess = true; - } + // we may as well save them, by moving these generated images to the permanent + // dataset directory. We should also remember to delete any such files in the + // temp directory: + List generatedTempFiles = listGeneratedTempFiles(Paths.get(FileUtil.getFilesTempDirectory()), + storageLocation); + if (generatedTempFiles != null) { + for (Path generated : generatedTempFiles) { + if (savedSuccess) { // no need to try to save this aux file permanently, if we've failed to + // save the main file! + logger.fine("(Will also try to permanently save generated thumbnail file " + + generated.toString() + ")"); + try { + // Files.copy(generated, Paths.get(dataset.getFileSystemDirectory().toString(), + // generated.getFileName().toString())); + int i = generated.toString().lastIndexOf("thumb"); + if (i > 1) { + String extensionTag = generated.toString().substring(i); + dataAccess.savePathAsAux(generated, extensionTag); + logger.fine( + "Saved generated thumbnail as aux object. \"preview available\" status: " + + dataFile.isPreviewImageAvailable()); + } else { + logger.warning( + "Generated thumbnail file name does not match the expected pattern: " + + generated.toString()); + } - logger.fine("Done! Finished saving new files in permanent storage and adding them to the dataset."); - boolean belowLimit = false; - - try { - //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null - belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); - } catch (IOException e) { - logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); - } - - if (savedSuccess && belowLimit) { - // These are all brand new files, so they should all have - // one filemetadata total. -- L.A. - FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); - String fileName = fileMetadata.getLabel(); - - boolean metadataExtracted = false; - boolean metadataExtractedFromNetcdf = false; - if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { - /* - * Note that we don't try to ingest the file right away - instead we mark it as - * "scheduled for ingest", then at the end of the save process it will be queued - * for async. ingest in the background. In the meantime, the file will be - * ingested as a regular, non-tabular file, and appear as such to the user, - * until the ingest job is finished with the Ingest Service. - */ - dataFile.SetIngestScheduled(); - } else if (fileMetadataExtractable(dataFile)) { - - try { - // FITS is the only type supported for metadata - // extraction, as of now. -- L.A. 4.0 - // Note that extractMetadataNcml() is used for NetCDF/HDF5. - dataFile.setContentType("application/fits"); - metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); - } catch (IOException mex) { - logger.severe("Caught exception trying to extract indexable metadata from file " - + fileName + ", " + mex.getMessage()); - } - if (metadataExtracted) { - logger.fine("Successfully extracted indexable metadata from file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from file " + fileName); - } - } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) { - try { - logger.fine("trying to extract metadata from netcdf"); - metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version); - } catch (IOException ex) { - logger.fine("could not extract metadata from netcdf: " + ex); - } - if (metadataExtractedFromNetcdf) { - logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName); - } else { - logger.fine("Failed to extract indexable metadata from netcdf file " + fileName); - } + } catch (IOException ioex) { + logger.warning("Failed to save generated file " + generated.toString()); + } + } - } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { + // ... but we definitely want to delete it: + try { + Files.delete(generated); + } catch (IOException ioex) { + logger.warning("Failed to delete generated file " + generated.toString()); + } + } + } + // Any necessary post-processing: + // performPostProcessingTasks(dataFile); + } else { + // This is a direct upload + try { + StorageIO dataAccess = DataAccess.getStorageIO(dataFile); + //Populate metadata + dataAccess.open(DataAccessOption.READ_ACCESS); + + confirmedFileSize = dataAccess.getSize(); + + // For directly-uploaded files, we will perform the file size + // limit and quota checks here. Perform them *again*, in + // some cases: a directly uploaded files have already been + // checked (for the sake of being able to reject the upload + // before the user clicks "save"). But in case of direct + // uploads via API, these checks haven't been performed yet, + // so, here's our chance. + + Long fileSizeLimit = systemConfig.getMaxFileUploadSizeForStore(version.getDataset().getEffectiveStorageDriverId()); + + if (fileSizeLimit == null || confirmedFileSize < fileSizeLimit) { + + //set file size + logger.fine("Setting file size: " + confirmedFileSize); + dataFile.setFilesize(confirmedFileSize); + + if (dataAccess instanceof S3AccessIO) { + ((S3AccessIO) dataAccess).removeTempTag(); + } + savedSuccess = true; + } + } catch (IOException ioex) { + logger.warning("Failed to get file size, storage id, or failed to remove the temp tag on the saved S3 object" + dataFile.getStorageIdentifier() + " (" + + ioex.getMessage() + ")"); + } + } + + // If quotas are enforced, we will perform a quota check here. + // If this is an upload via the UI, we must have already + // performed this check once. But it is possible that somebody + // else may have added more data to the same collection/dataset + // etc., before this user was ready to click "save", so this is + // necessary. For other cases, such as the direct uploads via + // the API, this is the single point in the workflow where + // storage quotas are enforced. + + if (savedSuccess) { + if (uploadSessionQuota != null) { + // It may be worth considering refreshing the quota here, + // and incrementing the Storage Use record for + // all the parent objects in real time, as + // *each* individual file is being saved. I experimented + // with that, but decided against it for performance + // reasons. But yes, there may be some edge case where + // parallel multi-file uploads can end up being able + // to save 2X worth the quota that was available at the + // beginning of each session. + if (confirmedFileSize > uploadSessionQuota.getRemainingQuotaInBytes()) { + savedSuccess = false; + logger.warning("file size over quota limit, skipping"); + // @todo: we need to figure out how to better communicate + // this (potentially partial) failure to the user. + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(confirmedFileSize), bytesToHumanReadable(storageQuotaLimit))); + } else { + // Adjust quota: + logger.info("Setting total usage in bytes to " + (uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize)); + uploadSessionQuota.setTotalUsageInBytes(uploadSessionQuota.getTotalUsageInBytes() + confirmedFileSize); + } + } + + // ... unless we had to reject the file just now because of + // the quota limits, count the number of bytes saved for the + // purposes of incrementing the total storage of the parent + // DvObjectContainers: + + if (savedSuccess) { + totalBytesSaved += confirmedFileSize; + } + } + + logger.fine("Done! Finished saving new file in permanent storage and adding it to the dataset."); + boolean belowLimit = false; + + try { + //getting StorageIO may require knowing the owner (so this must come before owner is potentially set back to null + belowLimit = dataFile.getStorageIO().isBelowIngestSizeLimit(); + } catch (IOException e) { + logger.warning("Error getting ingest limit for file: " + dataFile.getIdentifier() + " : " + e.getMessage()); + } + + if (savedSuccess && belowLimit) { + // These are all brand new files, so they should all have + // one filemetadata total. -- L.A. + FileMetadata fileMetadata = dataFile.getFileMetadatas().get(0); + String fileName = fileMetadata.getLabel(); + + boolean metadataExtracted = false; + boolean metadataExtractedFromNetcdf = false; + if (tabIngest && FileUtil.canIngestAsTabular(dataFile)) { + /** + * Note that we don't try to ingest the file right away + * - instead we mark it as "scheduled for ingest", then + * at the end of the save process it will be queued for + * async. ingest in the background. In the meantime, the + * file will be ingested as a regular, non-tabular file, + * and appear as such to the user, until the ingest job + * is finished with the Ingest Service. + */ + dataFile.SetIngestScheduled(); + } else if (fileMetadataExtractable(dataFile)) { + + try { + // FITS is the only type supported for metadata + // extraction, as of now. -- L.A. 4.0 + // Note that extractMetadataNcml() is used for NetCDF/HDF5. + dataFile.setContentType("application/fits"); + metadataExtracted = extractMetadata(tempFileLocation, dataFile, version); + } catch (IOException mex) { + logger.severe("Caught exception trying to extract indexable metadata from file " + + fileName + ", " + mex.getMessage()); + } + if (metadataExtracted) { + logger.fine("Successfully extracted indexable metadata from file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from file " + fileName); + } + } else if (fileMetadataExtractableFromNetcdf(dataFile, tempLocationPath)) { + try { + logger.fine("trying to extract metadata from netcdf"); + metadataExtractedFromNetcdf = extractMetadataFromNetcdf(tempFileLocation, dataFile, version); + } catch (IOException ex) { + logger.fine("could not extract metadata from netcdf: " + ex); + } + if (metadataExtractedFromNetcdf) { + logger.fine("Successfully extracted indexable metadata from netcdf file " + fileName); + } else { + logger.fine("Failed to extract indexable metadata from netcdf file " + fileName); + } + + } else if (FileUtil.MIME_TYPE_INGESTED_FILE.equals(dataFile.getContentType())) { // Make sure no *uningested* tab-delimited files are saved with the type "text/tab-separated-values"! // "text/tsv" should be used instead: dataFile.setContentType(FileUtil.MIME_TYPE_TSV); } - } + } if (unattached) { dataFile.setOwner(null); } - // ... and let's delete the main temp file if it exists: - if(tempLocationPath!=null) { - try { - logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); - Files.delete(tempLocationPath); - } catch (IOException ex) { - // (non-fatal - it's just a temp file.) - logger.warning("Failed to delete temp file " + tempLocationPath.toString()); - } - } - if (savedSuccess) { - // temp dbug line - // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + - // dataset.getGlobalId()); - // Make sure the file is attached to the dataset and to the version, if this - // hasn't been done yet: - if (dataFile.getOwner() == null) { - dataFile.setOwner(dataset); - - version.getFileMetadatas().add(dataFile.getFileMetadata()); - dataFile.getFileMetadata().setDatasetVersion(version); - dataset.getFiles().add(dataFile); - - if (dataFile.getFileMetadata().getCategories() != null) { - ListIterator dfcIt = dataFile.getFileMetadata().getCategories() - .listIterator(); - - while (dfcIt.hasNext()) { - DataFileCategory dataFileCategory = dfcIt.next(); - - if (dataFileCategory.getDataset() == null) { - DataFileCategory newCategory = dataset - .getCategoryByName(dataFileCategory.getName()); - if (newCategory != null) { - newCategory.addFileMetadata(dataFile.getFileMetadata()); - // dataFileCategory = newCategory; - dfcIt.set(newCategory); - } else { - dfcIt.remove(); - } - } - } - } - } - } + // ... and let's delete the main temp file if it exists: + if (tempLocationPath != null) { + try { + logger.fine("Will attempt to delete the temp file " + tempLocationPath.toString()); + Files.delete(tempLocationPath); + } catch (IOException ex) { + // (non-fatal - it's just a temp file.) + logger.warning("Failed to delete temp file " + tempLocationPath.toString()); + } + } + if (savedSuccess) { + // temp dbug line + // System.out.println("ADDING FILE: " + fileName + "; for dataset: " + + // dataset.getGlobalId()); + // Make sure the file is attached to the dataset and to the version, if this + // hasn't been done yet: + // @todo: but shouldn't we be doing the reverse if we haven't been + // able to save the file? - disconnect it from the dataset and + // the version?? - L.A. 2023 + // (that said, is there *ever* a case where dataFile.getOwner() != null ?) + if (dataFile.getOwner() == null) { + dataFile.setOwner(dataset); + + version.getFileMetadatas().add(dataFile.getFileMetadata()); + dataFile.getFileMetadata().setDatasetVersion(version); + dataset.getFiles().add(dataFile); + + if (dataFile.getFileMetadata().getCategories() != null) { + ListIterator dfcIt = dataFile.getFileMetadata().getCategories() + .listIterator(); + + while (dfcIt.hasNext()) { + DataFileCategory dataFileCategory = dfcIt.next(); + + if (dataFileCategory.getDataset() == null) { + DataFileCategory newCategory = dataset.getCategoryByName(dataFileCategory.getName()); + if (newCategory != null) { + newCategory.addFileMetadata(dataFile.getFileMetadata()); + // dataFileCategory = newCategory; + dfcIt.set(newCategory); + } else { + dfcIt.remove(); + } + } + } + } + } + + // Hmm. Noticing that the following two things - adding the + // files to the return list were being + // done outside of this "if (savedSuccess)" block. I'm pretty + // sure that was wrong. - L.A. 11-30-2023 + ret.add(dataFile); + // (unless that is that return value isn't used for anything - ?) + } - ret.add(dataFile); - } - } + } + // Update storage use for all the parent dvobjects: + logger.info("Incrementing recorded storage use by " + totalBytesSaved + " bytes for dataset " + dataset.getId()); + // Q. Need to consider what happens when this code is called on Create? + // A. It works on create as well, yes. (the recursive increment + // query in the method below does need the parent dataset to + // have the database id. But even if these files have been + // uploaded on the Create form, we first save the dataset, and + // then add the files to it. - L.A. + storageUseService.incrementStorageSizeRecursively(dataset.getId(), totalBytesSaved); + } - return ret; - } + return ret; + } public List listGeneratedTempFiles(Path tempDirectory, String baseName) { List generatedFiles = new ArrayList<>(); @@ -613,27 +726,17 @@ public void produceSummaryStatistics(DataFile dataFile, File generatedTabularFil } public void produceContinuousSummaryStatistics(DataFile dataFile, File generatedTabularFile) throws IOException { - - /* - // quick, but memory-inefficient way: - // - this method just loads the entire file-worth of continuous vectors - // into a Double[][] matrix. - //Double[][] variableVectors = subsetContinuousVectors(dataFile); - //calculateContinuousSummaryStatistics(dataFile, variableVectors); - - // A more sophisticated way: this subsets one column at a time, using - // the new optimized subsetting that does not have to read any extra - // bytes from the file to extract the column: - - TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); - */ for (int i = 0; i < dataFile.getDataTable().getVarQuantity(); i++) { if (dataFile.getDataTable().getDataVariables().get(i).isIntervalContinuous()) { logger.fine("subsetting continuous vector"); if ("float".equals(dataFile.getDataTable().getDataVariables().get(i).getFormat())) { - Float[] variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Float[] variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Float vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -641,7 +744,11 @@ public void produceContinuousSummaryStatistics(DataFile dataFile, File generated calculateUNF(dataFile, i, variableVector); variableVector = null; } else { - Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Double[] variableVector = TabularSubsetGenerator.subsetDoubleVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); logger.fine("Calculating summary statistics on a Double vector;"); calculateContinuousSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: @@ -663,7 +770,11 @@ public void produceDiscreteNumericSummaryStatistics(DataFile dataFile, File gene && dataFile.getDataTable().getDataVariables().get(i).isTypeNumeric()) { logger.fine("subsetting discrete-numeric vector"); - Long[] variableVector = TabularSubsetGenerator.subsetLongVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + Long[] variableVector = TabularSubsetGenerator.subsetLongVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); // We are discussing calculating the same summary stats for // all numerics (the same kind of sumstats that we've been calculating // for numeric continuous type) -- L.A. Jul. 2014 @@ -697,7 +808,11 @@ public void produceCharacterSummaryStatistics(DataFile dataFile, File generatedT if (dataFile.getDataTable().getDataVariables().get(i).isTypeCharacter()) { logger.fine("subsetting character vector"); - String[] variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, dataFile.getDataTable().getCaseQuantity().intValue()); + String[] variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + dataFile.getDataTable().getCaseQuantity().intValue(), + dataFile.getDataTable().isStoredWithVariableHeader()); //calculateCharacterSummaryStatistics(dataFile, i, variableVector); // calculate the UNF while we are at it: logger.fine("Calculating UNF on a String vector"); @@ -715,20 +830,29 @@ public static void produceFrequencyStatistics(DataFile dataFile, File generatedT produceFrequencies(generatedTabularFile, vars); } - public static void produceFrequencies( File generatedTabularFile, List vars) throws IOException { + public static void produceFrequencies(File generatedTabularFile, List vars) throws IOException { for (int i = 0; i < vars.size(); i++) { Collection cats = vars.get(i).getCategories(); int caseQuantity = vars.get(i).getDataTable().getCaseQuantity().intValue(); boolean isNumeric = vars.get(i).isTypeNumeric(); + boolean skipVariableHeaderLine = vars.get(i).getDataTable().isStoredWithVariableHeader(); Object[] variableVector = null; if (cats.size() > 0) { if (isNumeric) { - variableVector = TabularSubsetGenerator.subsetFloatVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetFloatVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } else { - variableVector = TabularSubsetGenerator.subsetStringVector(new FileInputStream(generatedTabularFile), i, caseQuantity); + variableVector = TabularSubsetGenerator.subsetStringVector( + new FileInputStream(generatedTabularFile), + i, + caseQuantity, + skipVariableHeaderLine); } if (variableVector != null) { Hashtable freq = calculateFrequency(variableVector); @@ -810,6 +934,7 @@ public boolean ingestAsTabular(Long datafile_id) { DataFile dataFile = fileService.find(datafile_id); boolean ingestSuccessful = false; boolean forceTypeCheck = false; + boolean storingWithVariableHeader = systemConfig.isStoringIngestedFilesWithHeaders(); // Never attempt to ingest a file that's already ingested! if (dataFile.isTabularData()) { @@ -911,11 +1036,7 @@ public boolean ingestAsTabular(Long datafile_id) { TabularDataIngest tabDataIngest = null; try { - if (additionalData != null) { - tabDataIngest = ingestPlugin.read(inputStream, additionalData); - } else { - tabDataIngest = ingestPlugin.read(inputStream, null); - } + tabDataIngest = ingestPlugin.read(inputStream, storingWithVariableHeader, additionalData); } catch (IOException ingestEx) { dataFile.SetIngestProblem(); FileUtil.createIngestFailureReport(dataFile, ingestEx.getMessage()); @@ -968,6 +1089,7 @@ public boolean ingestAsTabular(Long datafile_id) { dataFile.setDataTable(tabDataIngest.getDataTable()); tabDataIngest.getDataTable().setDataFile(dataFile); tabDataIngest.getDataTable().setOriginalFileName(originalFileName); + dataFile.getDataTable().setStoredWithVariableHeader(storingWithVariableHeader); try { produceSummaryStatistics(dataFile, tabFile); @@ -1030,7 +1152,14 @@ public boolean ingestAsTabular(Long datafile_id) { } } - if (!databaseSaveSuccessful) { + if (databaseSaveSuccessful) { + // Add the size of the tab-delimited version of the data file + // that we have produced and stored to the recorded storage + // size of all the ancestor DvObjectContainers: + if (dataFile.getFilesize() > 0) { + storageUseService.incrementStorageSizeRecursively(dataFile.getOwner().getId(), dataFile.getFilesize()); + } + } else { logger.warning("Ingest failure (failed to save the tabular data in the database; file left intact as uploaded)."); return false; } @@ -1052,6 +1181,7 @@ public boolean ingestAsTabular(Long datafile_id) { // Replace contents of the file with the tab-delimited data produced: dataAccess.savePath(Paths.get(tabFile.getAbsolutePath())); + // Reset the file size: dataFile.setFilesize(dataAccess.getSize()); @@ -2177,7 +2307,7 @@ public static void main(String[] args) { TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { System.err.println("Caught an exception trying to ingest file "+file+"."); System.exit(1); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java index 223b171dfb5..0f23a3d9781 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/TabularDataFileReader.java @@ -20,10 +20,13 @@ package edu.harvard.iq.dataverse.ingest.tabulardata; +import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.ingest.tabulardata.spi.*; //import edu.harvard.iq.dataverse.ingest.plugin.metadata.*; import java.io.*; import static java.lang.System.*; +import java.util.Iterator; +import java.util.List; import java.util.regex.Matcher; /** @@ -98,7 +101,7 @@ public void setDataLanguageEncoding(String dataLanguageEncoding) { * * @throws java.io.IOException if a reading error occurs. */ - public abstract TabularDataIngest read(BufferedInputStream stream, File dataFile) + public abstract TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException; @@ -176,5 +179,26 @@ protected String escapeCharacterString(String rawString) { return escapedString; } + + protected String generateVariableHeader(List dvs) { + String varHeader = null; + + if (dvs != null) { + Iterator iter = dvs.iterator(); + DataVariable dv; + + if (iter.hasNext()) { + dv = iter.next(); + varHeader = dv.getName(); + } + + while (iter.hasNext()) { + dv = iter.next(); + varHeader = varHeader + "\t" + dv.getName(); + } + } + + return varHeader; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java index 57f76df3802..f8816ababb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReader.java @@ -110,7 +110,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); if (stream == null) { @@ -124,7 +124,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath()); - int lineCount = readFile(localBufferedReader, dataTable, tabFileWriter); + int lineCount = readFile(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); logger.fine("Tab file produced: " + tabFileDestination.getAbsolutePath()); @@ -136,14 +136,17 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws } - public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter finalOut) throws IOException { + public int readFile(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter finalOut) throws IOException { List variableList = new ArrayList<>(); CSVParser parser = new CSVParser(csvReader, inFormat.withHeader()); Map headers = parser.getHeaderMap(); int i = 0; + String variableNameHeader = null; + for (String varName : headers.keySet()) { + // @todo: is .keySet() guaranteed to return the names in the right order? if (varName == null || varName.isEmpty()) { // TODO: // Add a sensible variable name validation algorithm. @@ -158,6 +161,13 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f dv.setTypeCharacter(); dv.setIntervalDiscrete(); + + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? varName + : variableNameHeader.concat("\t" + varName); + } + i++; } @@ -342,6 +352,14 @@ public int readFile(BufferedReader csvReader, DataTable dataTable, PrintWriter f try (BufferedReader secondPassReader = new BufferedReader(new FileReader(firstPassTempFile))) { parser = new CSVParser(secondPassReader, inFormat.withHeader()); String[] caseRow = new String[headers.size()]; + + // Save the variable name header, if requested + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException("failed to generate the Variable Names header"); + } + finalOut.println(variableNameHeader); + } for (CSVRecord record : parser) { if (!record.isConsistent()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java index 2dec701592e..73818f8fb62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReader.java @@ -505,7 +505,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { dbgLog.info("***** DTAFileReader: read() start *****"); if (dataFile != null) { @@ -519,7 +519,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws if (releaseNumber!=104) { decodeExpansionFields(stream); } - decodeData(stream); + decodeData(stream, storeWithVariableHeader); decodeValueLabels(stream); ingesteddata.setDataTable(dataTable); @@ -1665,7 +1665,7 @@ private void parseValueLabelsReleasel108(BufferedInputStream stream) throws IOEx dbgLog.fine("parseValueLabelsRelease108(): end"); } - private void decodeData(BufferedInputStream stream) throws IOException { + private void decodeData(BufferedInputStream stream, boolean saveWithVariableHeader) throws IOException { dbgLog.fine("\n***** decodeData(): start *****"); @@ -1719,6 +1719,11 @@ private void decodeData(BufferedInputStream stream) throws IOException { BUT, this needs to be reviewed/confirmed etc! */ //String[][] dateFormat = new String[nvar][nobs]; + + // add the variable header here, if needed + if (saveWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < nobs; i++) { byte[] dataRowBytes = new byte[bytes_per_row]; diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java index 22581834676..53607d541de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReader.java @@ -339,7 +339,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { logger.fine("NewDTAFileReader: read() start"); // shit ton of diagnostics (still) needed here!! -- L.A. @@ -363,7 +363,13 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws // "characteristics" - STATA-proprietary information // (we are skipping it) readCharacteristics(dataReader); - readData(dataReader); + + String variableHeaderLine = null; + + if (storeWithVariableHeader) { + variableHeaderLine = generateVariableHeader(dataTable.getDataVariables()); + } + readData(dataReader, variableHeaderLine); // (potentially) large, (potentially) non-ASCII character strings // saved outside the section, and referenced @@ -707,7 +713,7 @@ private void readCharacteristics(DataReader reader) throws IOException { } - private void readData(DataReader reader) throws IOException { + private void readData(DataReader reader, String variableHeaderLine) throws IOException { logger.fine("Data section; at offset " + reader.getByteOffset() + "; dta map offset: " + dtaMap.getOffset_data()); logger.fine("readData(): start"); reader.readOpeningTag(TAG_DATA); @@ -731,6 +737,11 @@ private void readData(DataReader reader) throws IOException { FileOutputStream fileOutTab = new FileOutputStream(tabDelimitedDataFile); PrintWriter pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true); + // add the variable header here, if needed + if (variableHeaderLine != null) { + pwout.println(variableHeaderLine); + } + logger.fine("Beginning to read data stream."); for (int i = 0; i < nobs; i++) { @@ -999,6 +1010,8 @@ private void readSTRLs(DataReader reader) throws IOException { int nobs = dataTable.getCaseQuantity().intValue(); String[] line; + + //@todo: adjust for the case of storing the file with the variable header for (int obsindex = 0; obsindex < nobs; obsindex++) { if (scanner.hasNext()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java index c90b0ea6950..2ee966c3e31 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/por/PORFileReader.java @@ -180,7 +180,7 @@ private void init() throws IOException { } @Override - public TabularDataIngest read(BufferedInputStream stream, File additionalData) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File additionalData) throws IOException{ dbgLog.fine("PORFileReader: read() start"); if (additionalData != null) { @@ -226,7 +226,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t headerId = "8S"; } - decode(headerId, bfReader); + decode(headerId, bfReader, storeWithVariableHeader); // for last iteration @@ -382,7 +382,7 @@ public TabularDataIngest read(BufferedInputStream stream, File additionalData) t return ingesteddata; } - private void decode(String headerId, BufferedReader reader) throws IOException{ + private void decode(String headerId, BufferedReader reader, boolean storeWithVariableHeader) throws IOException{ if (headerId.equals("1")) decodeProductName(reader); else if (headerId.equals("2")) decodeLicensee(reader); else if (headerId.equals("3")) decodeFileLabel(reader); @@ -398,7 +398,7 @@ private void decode(String headerId, BufferedReader reader) throws IOException{ else if (headerId.equals("C")) decodeVariableLabel(reader); else if (headerId.equals("D")) decodeValueLabel(reader); else if (headerId.equals("E")) decodeDocument(reader); - else if (headerId.equals("F")) decodeData(reader); + else if (headerId.equals("F")) decodeData(reader, storeWithVariableHeader); } @@ -1099,7 +1099,7 @@ private void decodeDocument(BufferedReader reader) throws IOException { } - private void decodeData(BufferedReader reader) throws IOException { + private void decodeData(BufferedReader reader, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeData(): start"); // TODO: get rid of this "variableTypeFinal"; -- L.A. 4.0 beta int[] variableTypeFinal= new int[varQnty]; @@ -1126,6 +1126,9 @@ private void decodeData(BufferedReader reader) throws IOException { // contents (variable) checker concering decimals Arrays.fill(variableTypeFinal, 0); + if (storeWithVariableHeader) { + pwout.println(StringUtils.join(variableNameList, "\t")); + } // raw-case counter int j = 0; // case diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java index eb1353fd792..50f2f89e354 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RDATAFileReader.java @@ -473,7 +473,7 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean saveWithVariableHeader, File dataFile) throws IOException { init(); @@ -509,7 +509,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws File tabFileDestination = File.createTempFile("data-", ".tab"); PrintWriter tabFileWriter = new PrintWriter(tabFileDestination.getAbsolutePath(), "UTF-8"); - int lineCount = csvFileReader.read(localBufferedReader, dataTable, tabFileWriter); + int lineCount = csvFileReader.read(localBufferedReader, dataTable, saveWithVariableHeader, tabFileWriter); LOG.fine("RDATAFileReader: successfully read "+lineCount+" lines of tab-delimited data."); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java index f60b7733463..fbe7e401b57 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/rdata/RTabFileParser.java @@ -61,8 +61,8 @@ public RTabFileParser (char delimiterChar) { // should be used. - public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout) throws IOException { - dbgLog.warning("RTabFileParser: Inside R Tab file parser"); + public int read(BufferedReader csvReader, DataTable dataTable, boolean saveWithVariableHeader, PrintWriter pwout) throws IOException { + dbgLog.fine("RTabFileParser: Inside R Tab file parser"); int varQnty = 0; @@ -94,14 +94,17 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout boolean[] isTimeVariable = new boolean[varQnty]; boolean[] isBooleanVariable = new boolean[varQnty]; + String variableNameHeader = null; + if (dataTable.getDataVariables() != null) { for (int i = 0; i < varQnty; i++) { DataVariable var = dataTable.getDataVariables().get(i); if (var == null) { - // throw exception! + throw new IOException ("null dataVariable passed to the parser"); + } if (var.getType() == null) { - // throw exception! + throw new IOException ("null dataVariable type passed to the parser"); } if (var.isTypeCharacter()) { isCharacterVariable[i] = true; @@ -128,13 +131,24 @@ public int read(BufferedReader csvReader, DataTable dataTable, PrintWriter pwout } } } else { - // throw excepion "unknown variable format type" - ? + throw new IOException ("unknown dataVariable format passed to the parser"); } - + if (saveWithVariableHeader) { + variableNameHeader = variableNameHeader == null + ? var.getName() + : variableNameHeader.concat("\t" + var.getName()); + } } } else { - // throw exception! + throw new IOException ("null dataVariables list passed to the parser"); + } + + if (saveWithVariableHeader) { + if (variableNameHeader == null) { + throw new IOException ("failed to generate the Variable Names header"); + } + pwout.println(variableNameHeader); } while ((line = csvReader.readLine()) != null) { diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java index 682b8f1166c..5eecbdfb666 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/sav/SAVFileReader.java @@ -338,7 +338,7 @@ private void init() throws IOException { } } - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException{ + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException{ dbgLog.info("SAVFileReader: read() start"); if (dataFile != null) { @@ -422,7 +422,7 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws methodCurrentlyExecuted = "decodeRecordTypeData"; dbgLog.fine("***** SAVFileReader: executing method decodeRecordTypeData"); - decodeRecordTypeData(stream); + decodeRecordTypeData(stream, storeWithVariableHeader); } catch (IllegalArgumentException e) { @@ -2308,7 +2308,7 @@ void decodeRecordType999(BufferedInputStream stream) throws IOException { - void decodeRecordTypeData(BufferedInputStream stream) throws IOException { + void decodeRecordTypeData(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("decodeRecordTypeData(): start"); ///String fileUnfValue = null; @@ -2320,9 +2320,9 @@ void decodeRecordTypeData(BufferedInputStream stream) throws IOException { throw new IllegalArgumentException("stream == null!"); } if (isDataSectionCompressed){ - decodeRecordTypeDataCompressed(stream); + decodeRecordTypeDataCompressed(stream, storeWithVariableHeader); } else { - decodeRecordTypeDataUnCompressed(stream); + decodeRecordTypeDataUnCompressed(stream, storeWithVariableHeader); } /* UNF calculation was here... */ @@ -2362,7 +2362,7 @@ PrintWriter createOutputWriter (BufferedInputStream stream) throws IOException { } - void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****"); @@ -2395,7 +2395,10 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti dbgLog.fine("printFormatTable:\n" + printFormatTable); variableFormatTypeList = new String[varQnty]; - + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } for (int i = 0; i < varQnty; i++) { variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE.get( @@ -2947,7 +2950,7 @@ void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOExcepti } - void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException { + void decodeRecordTypeDataUnCompressed(BufferedInputStream stream, boolean storeWithVariableHeader) throws IOException { dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****"); if (stream ==null){ @@ -3013,6 +3016,11 @@ void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOExcep ///dataTable2 = new Object[varQnty][caseQnty]; // storage of date formats to pass to UNF ///dateFormats = new String[varQnty][caseQnty]; + + // write the variable header out, if instructed to do so + if (storeWithVariableHeader) { + pwout.println(generateVariableHeader(dataTable.getDataVariables())); + } try { for (int i = 0; ; i++){ // case-wise loop diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java index ea3f3868f24..ef91793690e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/xlsx/XLSXFileReader.java @@ -36,7 +36,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.poi.xssf.eventusermodel.XSSFReader; -import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.openxml4j.opc.OPCPackage; import org.xml.sax.Attributes; @@ -81,7 +80,9 @@ private void init() throws IOException { * @throws java.io.IOException if a reading error occurs. */ @Override - public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws IOException { + public TabularDataIngest read(BufferedInputStream stream, boolean storeWithVariableHeader, File dataFile) throws IOException { + // @todo: implement handling of "saveWithVariableHeader" option + init(); TabularDataIngest ingesteddata = new TabularDataIngest(); @@ -118,6 +119,10 @@ public TabularDataIngest read(BufferedInputStream stream, File dataFile) throws String[] caseRow = new String[varQnty]; String[] valueTokens; + // add the variable header here, if needed + if (storeWithVariableHeader) { + finalWriter.println(generateVariableHeader(dataTable.getDataVariables())); + } while ((line = secondPassReader.readLine()) != null) { // chop the line: @@ -549,7 +554,7 @@ public static void main(String[] args) throws Exception { BufferedInputStream xlsxInputStream = new BufferedInputStream(new FileInputStream(new File(args[0]))); - TabularDataIngest dataIngest = testReader.read(xlsxInputStream, null); + TabularDataIngest dataIngest = testReader.read(xlsxInputStream, false, null); dataTable = dataIngest.getDataTable(); diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBean.java index 5edf2fde0c3..c3bf85e699a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBean.java @@ -46,6 +46,12 @@ public void logEntry(MakeDataCountEntry entry) { public String getLogFileName() { return "counter_"+new SimpleDateFormat("yyyy-MM-dd").format(new Timestamp(new Date().getTime()))+".log"; } + + // Sanitize the values to a safe string for the log file + static String sanitize(String in) { + // Log lines are tab delimited so tabs must be replaced. Replacing escape sequences with a space. + return in != null ? in.replaceAll("\\s+", " ") : null; + } public static class MakeDataCountEntry { @@ -367,7 +373,7 @@ public String getTitle() { * @param title the title to set */ public final void setTitle(String title) { - this.title = title; + this.title = sanitize(title); } /** @@ -384,7 +390,7 @@ public String getPublisher() { * @param publisher the publisher to set */ public final void setPublisher(String publisher) { - this.publisher = publisher; + this.publisher = sanitize(publisher); } /** @@ -401,7 +407,7 @@ public String getPublisherId() { * @param publisherId the publisherId to set */ public final void setPublisherId(String publisherId) { - this.publisherId = publisherId; + this.publisherId = sanitize(publisherId); } /** @@ -418,7 +424,7 @@ public String getAuthors() { * @param authors the authors to set */ public final void setAuthors(String authors) { - this.authors = authors; + this.authors = sanitize(authors); } /** @@ -452,7 +458,7 @@ public String getVersion() { * @param version the version to set */ public final void setVersion(String version) { - this.version = version; + this.version = sanitize(version); } /** @@ -469,7 +475,7 @@ public String getOtherId() { * @param otherId the otherId to set */ public void setOtherId(String otherId) { - this.otherId = otherId; + this.otherId = sanitize(otherId); } /** diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java new file mode 100644 index 00000000000..2241a2c4ca8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessState.java @@ -0,0 +1,75 @@ +package edu.harvard.iq.dataverse.makedatacount; + +import jakarta.persistence.*; + +import java.io.Serializable; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.Arrays; + +@Entity +@Table(indexes = {@Index(columnList="yearMonth")}) +public class MakeDataCountProcessState implements Serializable { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(nullable = false) + private Long id; + + public enum MDCProcessState { + NEW("new"), DONE("done"), SKIP("skip"), PROCESSING("processing"), FAILED("failed"); + private final String text; + private MDCProcessState(final String text) { + this.text = text; + } + public static MDCProcessState fromString(String text) { + if (text != null) { + for (MDCProcessState state : MDCProcessState.values()) { + if (text.equals(state.text)) { + return state; + } + } + } + throw new IllegalArgumentException("State must be one of these values: " + Arrays.asList(MDCProcessState.values()) + "."); + } + @Override + public String toString() { + return text; + } + } + @Column(nullable = false) + private String yearMonth; + @Column(nullable = false) + private MDCProcessState state; + @Column(nullable = true) + private Timestamp stateChangeTimestamp; + + public MakeDataCountProcessState() { } + public MakeDataCountProcessState (String yearMonth, String state) { + this.setYearMonth(yearMonth); + this.setState(state); + } + + public void setYearMonth(String yearMonth) throws IllegalArgumentException { + // Todo: add constraint + if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) { + throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd"); + } + this.yearMonth = yearMonth; + } + public String getYearMonth() { + return this.yearMonth; + } + public void setState(MDCProcessState state) { + this.state = state; + this.stateChangeTimestamp = Timestamp.from(Instant.now()); + } + public void setState(String state) throws IllegalArgumentException { + setState(MDCProcessState.fromString(state)); + } + public MDCProcessState getState() { + return this.state; + } + public Timestamp getStateChangeTime() { + return stateChangeTimestamp; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java new file mode 100644 index 00000000000..5d7ec8ff047 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountProcessStateServiceBean.java @@ -0,0 +1,61 @@ +package edu.harvard.iq.dataverse.makedatacount; + +import jakarta.ejb.EJBException; +import jakarta.ejb.Stateless; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; + +import java.util.List; + +@Named +@Stateless +public class MakeDataCountProcessStateServiceBean { + + @PersistenceContext(unitName = "VDCNet-ejbPU") + protected EntityManager em; + + public MakeDataCountProcessState getMakeDataCountProcessState(String yearMonth) { + validateYearMonth(yearMonth); + MakeDataCountProcessState mdcps = null; + String queryStr = "SELECT d FROM MakeDataCountProcessState d WHERE d.yearMonth = '" + yearMonth + "' "; + Query query = em.createQuery(queryStr); + List resultList = query.getResultList(); + if (resultList.size() > 1) { + throw new EJBException("More than one MakeDataCount Process State record found for YearMonth " + yearMonth + "."); + } + if (resultList.size() == 1) { + mdcps = (MakeDataCountProcessState) resultList.get(0); + } + return mdcps; + } + + public MakeDataCountProcessState setMakeDataCountProcessState(String yearMonth, String state) { + MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth); + if (mdcps == null) { + mdcps = new MakeDataCountProcessState(yearMonth, state); + } else { + mdcps.setState(state); + } + return em.merge(mdcps); + } + + public boolean deleteMakeDataCountProcessState(String yearMonth) { + MakeDataCountProcessState mdcps = getMakeDataCountProcessState(yearMonth); + if (mdcps == null) { + return false; + } else { + em.remove(mdcps); + em.flush(); + return true; + } + } + + private void validateYearMonth(String yearMonth) { + // Check yearMonth format. either yyyy-mm or yyyy-mm-dd + if (yearMonth == null || (!yearMonth.matches("\\d{4}-\\d{2}") && !yearMonth.matches("\\d{4}-\\d{2}-\\d{2}"))) { + throw new IllegalArgumentException("YEAR-MONTH date format must be either yyyy-mm or yyyy-mm-dd"); + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..a74474efa15 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -51,7 +51,7 @@ public class MetricsServiceBean implements Serializable { /** Dataverses */ - + public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { Query query = em.createNativeQuery("" + "select distinct to_char(date_trunc('month', dvobject.publicationdate),'YYYY-MM') as month, count(date_trunc('month', dvobject.publicationdate))\n" @@ -64,7 +64,7 @@ public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -129,9 +129,9 @@ public List dataversesBySubject(Dataverse d) { /** Datasets */ - + public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dataverse d) { - Query query = em.createNativeQuery( + Query query = em.createNativeQuery( "select distinct date, count(dataset_id)\n" + "from (\n" + "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, dataset_id\n" @@ -149,8 +149,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - - + + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -180,10 +180,10 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { // But do not use this notation if you need the values returned to // meaningfully identify the datasets! - + Query query = em.createNativeQuery( - - + + "select count(*)\n" + "from (\n" + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" @@ -312,7 +312,7 @@ public JsonArray filesTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -389,7 +389,7 @@ public JsonArray filesByType(Dataverse d) { return jab.build(); } - + public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { Query query = em.createNativeQuery("SELECT DISTINCT to_char(" + (published ? "ob.publicationdate" : "ob.createdate") + ",'YYYY-MM') as date, df.contenttype, count(df.id), coalesce(sum(df.filesize),0) " + " FROM DataFile df, DvObject ob" @@ -402,13 +402,13 @@ public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { logger.log(Level.FINE, "Metric query: {0}", query); List results = query.getResultList(); return MetricsUtil.timeSeriesByTypeToJson(results); - + } - /** Downloads + /** Downloads * @param d * @throws ParseException */ - + public JsonArray downloadsTimeSeries(Dataverse d) { // ToDo - published only? Query earlyDateQuery = em.createNativeQuery("" @@ -432,11 +432,11 @@ public JsonArray downloadsTimeSeries(Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /* * This includes getting historic download without a timestamp if query * is earlier than earliest timestamped record - * + * * @param yyyymm Month in YYYY-MM format. */ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { @@ -459,7 +459,7 @@ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { + "where (date_trunc('month', responsetime) <= to_date('" + yyyymm + "','YYYY-MM')" + "or responsetime is NULL)\n" // includes historic guestbook records without date + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" - + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") + + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") ); logger.log(Level.FINE, "Metric query: {0}", query); return (long) query.getSingleResult(); @@ -487,7 +487,7 @@ public long downloadsPastDays(int days, Dataverse d) { return (long) query.getSingleResult(); } - + public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select distinct to_char(gb.responsetime, 'YYYY-MM') as date, ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -501,7 +501,7 @@ public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { return MetricsUtil.timeSeriesByIDAndPIDToJson(results); } - + public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -543,7 +543,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesByPIDToJson(results); } - + public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { //select distinct count(distinct email),dataset_id, date_trunc('month', responsetime) from guestbookresponse group by dataset_id, date_trunc('month',responsetime) order by dataset_id,date_trunc('month',responsetime); @@ -571,10 +571,58 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { return jab.build(); } - - //MDC - - + + //Accounts + + /* + * + * @param yyyymm Month in YYYY-MM format. + */ + public long accountsToMonth(String yyyymm) throws ParseException { + Query query = em.createNativeQuery("" + + "select count(authenticateduser.id)\n" + + "from authenticateduser\n" + + "where authenticateduser.createdtime is not null\n" + + "and date_trunc('month', createdtime) <= to_date('" + yyyymm + "','YYYY-MM');" + ); + logger.log(Level.FINE, "Metric query: {0}", query); + + return (long) query.getSingleResult(); + } + + /* + * + * @param days interval since the current date to list + * the number of user accounts created + */ + public long accountsPastDays(int days) { + Query query = em.createNativeQuery("" + + "select count(id)\n" + + "from authenticateduser\n" + + "where authenticateduser.createdtime is not null\n" + + "and authenticateduser.createdtime > current_date - interval '" + days + "' day;" + ); + logger.log(Level.FINE, "Metric query: {0}", query); + + return (long) query.getSingleResult(); + } + + public JsonArray accountsTimeSeries() { + Query query = em.createNativeQuery("" + + "select distinct to_char(au.createdtime, 'YYYY-MM'), count(id)\n" + + "from authenticateduser as au\n" + + "where au.createdtime is not null\n" + + "group by to_char(au.createdtime, 'YYYY-MM')\n" + + "order by to_char(au.createdtime, 'YYYY-MM');"); + + logger.log(Level.FINE, "Metric query: {0}", query); + List results = query.getResultList(); + return MetricsUtil.timeSeriesToJson(results); + } + + //MDC + + public JsonArray mdcMetricTimeSeries(MetricType metricType, String country, Dataverse d) { Query query = em.createNativeQuery("SELECT distinct substring(monthyear from 1 for 7) as date, coalesce(sum(" + metricType.toString() + "),0) as count FROM DatasetMetrics\n" + ((d == null) ? "" : "WHERE dataset_id in ( " + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") @@ -746,7 +794,7 @@ public Metric getMetric(String name, String dataLocation, String dayString, Data // https://github.com/DANS-KNAW/dataverse/blob/dans-develop/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsDansServiceBean.java /** - * + * * @param dvId - parent dataverse id * @param dtype - type of object to return 'Dataverse' or 'Dataset' * @return - list of objects of specified type included in the subtree (includes parent dataverse if dtype is 'Dataverse') @@ -768,7 +816,7 @@ private String getCommaSeparatedIdStringForSubtree(Dataverse d, String dtype) { } private List getChildrenIdsRecursively(Long dvId, String dtype, DatasetVersion.VersionState versionState) { - + //Intended to be called only with dvId != null String sql = "WITH RECURSIVE querytree AS (\n" + " SELECT id, dtype, owner_id, publicationdate\n" diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java index e9898031343..6c99155d8a4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/DataRetrieverAPI.java @@ -3,6 +3,7 @@ */ package edu.harvard.iq.dataverse.mydata; +import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DataverseSession; @@ -26,10 +27,10 @@ import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SortBy; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.logging.Logger; -import java.util.Locale; import jakarta.ejb.EJB; import jakarta.inject.Inject; import jakarta.json.Json; @@ -39,7 +40,6 @@ import jakarta.ws.rs.Path; import jakarta.ws.rs.Produces; import jakarta.ws.rs.QueryParam; -import jakarta.ws.rs.DefaultValue; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; @@ -64,7 +64,7 @@ public class DataRetrieverAPI extends AbstractApiBean { private static final String retrieveDataPartialAPIPath = "retrieve"; @Inject - DataverseSession session; + DataverseSession session; @EJB DataverseRoleServiceBean dataverseRoleService; @@ -82,6 +82,8 @@ public class DataRetrieverAPI extends AbstractApiBean { //MyDataQueryHelperServiceBean myDataQueryHelperServiceBean; @EJB GroupServiceBean groupService; + @EJB + DatasetServiceBean datasetService; private List roleList; private DataverseRolePermissionHelper rolePermissionHelper; @@ -226,7 +228,12 @@ private SolrQueryResponse getTotalCountsFromSolr(DataverseRequest dataverseReque //SearchFields.RELEASE_OR_CREATE_DATE, SortBy.DESCENDING, 0, //paginationStart, true, // dataRelatedToMe - SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE + SearchConstants.NUM_SOLR_DOCS_TO_RETRIEVE, //10 // SearchFields.NUM_SOLR_DOCS_TO_RETRIEVE + true, + null, + null, + false, // no need to request facets here ... + false // ... same for highlights ); } catch (SearchException ex) { logger.severe("Search for total counts failed with filter query"); @@ -270,9 +277,7 @@ public String retrieveMyDataAsJsonString( @QueryParam("dataset_valid") List datasetValidities) { boolean OTHER_USER = false; - String localeCode = session.getLocaleCode(); - String noMsgResultsFound = BundleUtil.getStringFromPropertyFile("dataretrieverAPI.noMsgResultsFound", - "Bundle", new Locale(localeCode)); + String noMsgResultsFound = BundleUtil.getStringFromBundle("dataretrieverAPI.noMsgResultsFound"); if ((session.getUser() != null) && (session.getUser().isAuthenticated())) { authUser = (AuthenticatedUser) session.getUser(); @@ -280,7 +285,10 @@ public String retrieveMyDataAsJsonString( try { authUser = getRequestAuthenticatedUserOrDie(crc); } catch (WrappedResponse e) { - return this.getJSONErrorString("Requires authentication. Please login.", "retrieveMyDataAsJsonString. User not found! Shouldn't be using this anyway"); + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.authentication.required"), + BundleUtil.getStringFromBundle("dataretrieverAPI.authentication.required.opt") + ); } } @@ -293,7 +301,9 @@ public String retrieveMyDataAsJsonString( authUser = searchUser; OTHER_USER = true; } else { - return this.getJSONErrorString("No user found for: \"" + userIdentifier + "\"", null); + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.user.not.found", Arrays.asList(userIdentifier)), + null); } } @@ -333,8 +343,7 @@ public String retrieveMyDataAsJsonString( myDataFinder = new MyDataFinder(rolePermissionHelper, roleAssigneeService, dvObjectServiceBean, - groupService, - noMsgResultsFound); + groupService); this.myDataFinder.runFindDataSteps(filterParams); if (myDataFinder.hasError()){ return this.getJSONErrorString(myDataFinder.getErrorMessage(), myDataFinder.getErrorMessage()); @@ -389,11 +398,14 @@ public String retrieveMyDataAsJsonString( } catch (SearchException ex) { solrQueryResponse = null; - this.logger.severe("Solr SearchException: " + ex.getMessage()); + logger.severe("Solr SearchException: " + ex.getMessage()); } - if (solrQueryResponse==null){ - return this.getJSONErrorString("Sorry! There was an error with the search service.", "Sorry! There was a SOLR Error"); + if (solrQueryResponse == null) { + return this.getJSONErrorString( + BundleUtil.getStringFromBundle("dataretrieverAPI.solr.error"), + BundleUtil.getStringFromBundle("dataretrieverAPI.solr.error.opt") + ); } // --------------------------------- @@ -487,9 +499,10 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR // ------------------------------------------- // (a) Get core card data from solr // ------------------------------------------- - myDataCardInfo = doc.getJsonForMyData(); - if (!doc.getEntity().isInstanceofDataFile()){ + myDataCardInfo = doc.getJsonForMyData(isValid(doc)); + + if (doc.getEntity() != null && !doc.getEntity().isInstanceofDataFile()){ String parentAlias = dataverseService.getParentAliasString(doc); myDataCardInfo.add("parent_alias",parentAlias); } @@ -510,4 +523,8 @@ private JsonArrayBuilder formatSolrDocs(SolrQueryResponse solrResponse, RoleTagR return jsonSolrDocsArrayBuilder; } + + private boolean isValid(SolrSearchResult result) { + return result.isValid(x -> true); + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java index 2ab248fcc0b..2acb93d37f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFilterParams.java @@ -12,6 +12,7 @@ import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.search.SearchConstants; import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -178,26 +179,25 @@ public List getRoleIds(){ } - - private void checkParams(){ - - if ((this.userIdentifier == null)||(this.userIdentifier.isEmpty())){ - this.addError("Sorry! No user was found!"); + private void checkParams() { + if ((this.userIdentifier == null) || (this.userIdentifier.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.no.user")); return; } - if ((this.roleIds == null)||(this.roleIds.isEmpty())){ - this.addError("No results. Please select at least one Role."); + if ((this.roleIds == null) || (this.roleIds.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.result.no.role")); return; } - if ((this.dvObjectTypes == null)||(this.dvObjectTypes.isEmpty())){ - this.addError("No results. Please select one of Dataverses, Datasets, Files."); + if ((this.dvObjectTypes == null) || (this.dvObjectTypes.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("myDataFilterParams.error.result.no.dvobject")); return; } - - if ((this.publicationStatuses == null)||(this.publicationStatuses.isEmpty())){ - this.addError("No results. Please select one of " + StringUtils.join(MyDataFilterParams.defaultPublishedStates, ", ") + "."); + + if ((this.publicationStatuses == null) || (this.publicationStatuses.isEmpty())) { + this.addError(BundleUtil.getStringFromBundle("dataretrieverAPI.user.not.found", + Arrays.asList(StringUtils.join(MyDataFilterParams.defaultPublishedStates, ", ")))); return; } } @@ -292,7 +292,7 @@ public String getSolrFragmentForPublicationStatus(){ } public String getSolrFragmentForDatasetValidity(){ - if ((this.datasetValidities == null) || (this.datasetValidities.isEmpty())){ + if ((this.datasetValidities == null) || (this.datasetValidities.isEmpty()) || (this.datasetValidities.size() > 1)){ return ""; } diff --git a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java index 917884f3549..5626a442762 100644 --- a/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java +++ b/src/main/java/edu/harvard/iq/dataverse/mydata/MyDataFinder.java @@ -11,7 +11,9 @@ import edu.harvard.iq.dataverse.authorization.DataverseRolePermissionHelper; import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.util.BundleUtil; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -47,7 +49,6 @@ public class MyDataFinder { private RoleAssigneeServiceBean roleAssigneeService; private DvObjectServiceBean dvObjectServiceBean; private GroupServiceBean groupService; - private String noMsgResultsFound; //private RoleAssigneeServiceBean roleService = new RoleAssigneeServiceBean(); //private MyDataQueryHelperServiceBean myDataQueryHelperService; // -------------------- @@ -86,12 +87,11 @@ public class MyDataFinder { private List fileGrandparentFileIds = new ArrayList<>(); // dataverse has file permissions - public MyDataFinder(DataverseRolePermissionHelper rolePermissionHelper, RoleAssigneeServiceBean roleAssigneeService, DvObjectServiceBean dvObjectServiceBean, GroupServiceBean groupService, String _noMsgResultsFound) { + public MyDataFinder(DataverseRolePermissionHelper rolePermissionHelper, RoleAssigneeServiceBean roleAssigneeService, DvObjectServiceBean dvObjectServiceBean, GroupServiceBean groupService) { this.rolePermissionHelper = rolePermissionHelper; this.roleAssigneeService = roleAssigneeService; this.dvObjectServiceBean = dvObjectServiceBean; this.groupService = groupService; - this.noMsgResultsFound = _noMsgResultsFound; this.loadHarvestedDataverseIds(); } @@ -213,7 +213,7 @@ private List getSolrFilterQueries(boolean totalCountsOnly){ // ----------------------------------------------------------------- String dvObjectFQ = this.getSolrDvObjectFilterQuery(); if (dvObjectFQ ==null){ - this.addErrorMessage(noMsgResultsFound); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.empty")); return null; } filterQueries.add(dvObjectFQ); @@ -239,7 +239,7 @@ private List getSolrFilterQueries(boolean totalCountsOnly){ //fq=publicationStatus:"Unpublished"&fq=publicationStatus:"Draft" // ----------------------------------------------------------------- - // (4) FQ by dataset metadata vlidity + // (4) FQ by dataset metadata validity // ----------------------------------------------------------------- filterQueries.add(this.filterParams.getSolrFragmentForDatasetValidity()); //fq=datasetValid:(true OR false) @@ -286,7 +286,7 @@ public String getSolrDvObjectFilterQuery(){ if ((distinctEntityIds.isEmpty()) && (distinctParentIds.isEmpty())) { - this.addErrorMessage(noMsgResultsFound); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.empty")); return null; } @@ -430,24 +430,25 @@ public JsonArrayBuilder getListofSelectedRoles(){ } - private boolean runStep1RoleAssignments(){ + private boolean runStep1RoleAssignments() { List results = this.roleAssigneeService.getAssigneeAndRoleIdListFor(filterParams); //logger.info("runStep1RoleAssignments results: " + results.toString()); - if (results == null){ - this.addErrorMessage("Sorry, the EntityManager isn't working (still)."); + if (results == null) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.null")); return false; - }else if (results.isEmpty()){ + } else if (results.isEmpty()) { List roleNames = this.rolePermissionHelper.getRoleNamesByIdList(this.filterParams.getRoleIds()); - if ((roleNames == null)||(roleNames.isEmpty())){ - this.addErrorMessage("Sorry, you have no assigned roles."); - }else{ - if (roleNames.size()==1){ - this.addErrorMessage("Sorry, nothing was found for this role: " + StringUtils.join(roleNames, ", ")); - }else{ - this.addErrorMessage("Sorry, nothing was found for these roles: " + StringUtils.join(roleNames, ", ")); + if ((roleNames == null) || (roleNames.isEmpty())) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.no.role")); + } else { + final List args = Arrays.asList(StringUtils.join(roleNames, ", ")); + if (roleNames.size() == 1) { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.role.empty", args)); + } else { + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.roles.empty", args)); } } return false; @@ -497,7 +498,7 @@ private boolean runStep2DirectAssignments(){ List results = this.dvObjectServiceBean.getDvObjectInfoForMyData(directDvObjectIds); //List results = this.roleAssigneeService.getAssignmentsFor(this.userIdentifier); if (results.isEmpty()){ - this.addErrorMessage("Sorry, you have no assigned Dataverses, Datasets, or Files."); + this.addErrorMessage(BundleUtil.getStringFromBundle("myDataFinder.error.result.no.dvobject")); return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/openapi/OpenApi.java b/src/main/java/edu/harvard/iq/dataverse/openapi/OpenApi.java new file mode 100644 index 00000000000..6bd54916e0d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/openapi/OpenApi.java @@ -0,0 +1,101 @@ +package edu.harvard.iq.dataverse.openapi; + +import java.io.*; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.logging.*; + +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.servlet.ServletException; +import jakarta.servlet.annotation.WebServlet; +import jakarta.servlet.http.*; +import jakarta.ws.rs.core.*; +import org.apache.commons.io.IOUtils; +import edu.harvard.iq.dataverse.api.Info; +import edu.harvard.iq.dataverse.util.BundleUtil; + +@WebServlet("/openapi") +public class OpenApi extends HttpServlet { + + private static final Logger logger = Logger.getLogger(Info.class.getCanonicalName()); + + private static final String YAML_FORMAT = "yaml"; + private static final String JSON_FORMAT = "json"; + + + @Override + protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { + + + String format = req.getParameter("format"); + String accept = req.getHeader("Accept"); + + /* + * We first check for the headers, if the request accepts application/json + * have to check for the format parameter, if it is different from json + * return BAD_REQUEST (400) + */ + if (MediaType.APPLICATION_JSON.equals(accept)){ + if (format != null && !JSON_FORMAT.equals(format)){ + List args = Arrays.asList(accept, format); + String bundleResponse = BundleUtil.getStringFromBundle("openapi.exception.unaligned", args); + resp.sendError(Response.Status.BAD_REQUEST.getStatusCode(), + bundleResponse); + return; + } else { + format = JSON_FORMAT; + } + } + + /* + * We currently support only JSON or YAML being the second the default + * if no format is specified, if a different format is specified we return + * UNSUPPORTED_MEDIA_TYPE (415) specifying that the format is not supported + */ + + format = format == null ? YAML_FORMAT : format.toLowerCase(); + + if (JSON_FORMAT.equals(format)) { + resp.setContentType(MediaType.APPLICATION_JSON_TYPE.toString()); + } else if (YAML_FORMAT.equals(format)){ + resp.setContentType(MediaType.TEXT_PLAIN_TYPE.toString()); + } else { + + List args = Arrays.asList(format); + String bundleResponse = BundleUtil.getStringFromBundle("openapi.exception.invalid.format", args); + + JsonObject errorResponse = Json.createObjectBuilder() + .add("status", "ERROR") + .add("code", HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE) + .add("message", bundleResponse) + .build(); + + resp.setContentType(MediaType.APPLICATION_JSON_TYPE.toString()); + resp.setStatus(HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE); + + PrintWriter responseWriter = resp.getWriter(); + responseWriter.println(errorResponse.toString()); + responseWriter.flush(); + return; + } + + try { + String baseFileName = "/META-INF/openapi." + format; + ClassLoader classLoader = this.getClass().getClassLoader(); + URL aliasesResource = classLoader.getResource(baseFileName); + InputStream openapiDefinitionStream = aliasesResource.openStream(); + String content = IOUtils.toString(openapiDefinitionStream, StandardCharsets.UTF_8); + resp.getWriter().write(content); + } catch (Exception e) { + logger.log(Level.SEVERE, "OpenAPI Definition format not found " + format + ":" + e.getMessage(), e); + String bundleResponse = BundleUtil.getStringFromBundle("openapi.exception"); + resp.sendError(Response.Status.INTERNAL_SERVER_ERROR.getStatusCode(), + bundleResponse); + } + + + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java new file mode 100644 index 00000000000..f6d142aac96 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -0,0 +1,556 @@ +package edu.harvard.iq.dataverse.pidproviders; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; + +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.lang3.RandomStringUtils; +import com.beust.jcommander.Strings; + +public abstract class AbstractPidProvider implements PidProvider { + + private static final Logger logger = Logger.getLogger(AbstractPidProvider.class.getCanonicalName()); + + public static String UNAVAILABLE = ":unav"; + public static final String SEPARATOR = "/"; + + protected PidProviderFactoryBean pidProviderService; + + private String protocol; + + private String authority = null; + + private String shoulder = null; + + private String identifierGenerationStyle = null; + + private String datafilePidFormat = null; + + private HashSet managedSet; + + private HashSet excludedSet; + + private String id; + private String label; + + protected AbstractPidProvider(String id, String label, String protocol) { + this.id = id; + this.label = label; + this.protocol = protocol; + this.managedSet = new HashSet(); + this.excludedSet = new HashSet(); + } + + protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder, + String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { + this.id = id; + this.label = label; + this.protocol = protocol; + this.authority = authority; + this.shoulder = shoulder; + this.identifierGenerationStyle = identifierGenerationStyle; + this.datafilePidFormat = datafilePidFormat; + this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); + this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + if (logger.isLoggable(Level.FINE)) { + Iterator iter = managedSet.iterator(); + while (iter.hasNext()) { + logger.fine("managedSet in " + getId() + ": " + iter.next()); + } + iter = excludedSet.iterator(); + while (iter.hasNext()) { + logger.fine("excludedSet in " + getId() + ": " + iter.next()); + } + } + } + + @Override + public Map getMetadataForCreateIndicator(DvObject dvObjectIn) { + logger.log(Level.FINE, "getMetadataForCreateIndicator(DvObject)"); + Map metadata = new HashMap<>(); + metadata = addBasicMetadata(dvObjectIn, metadata); + metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); + metadata.put("_target", getTargetUrl(dvObjectIn)); + return metadata; + } + + protected Map getUpdateMetadata(DvObject dvObjectIn) { + logger.log(Level.FINE, "getUpdateMetadataFromDataset"); + Map metadata = new HashMap<>(); + metadata = addBasicMetadata(dvObjectIn, metadata); + return metadata; + } + + protected Map addBasicMetadata(DvObject dvObjectIn, Map metadata) { + + String authorString = dvObjectIn.getAuthorString(); + if (authorString.isEmpty() || authorString.contains(DatasetField.NA_VALUE)) { + authorString = UNAVAILABLE; + } + + String producerString = pidProviderService.getProducer(); + + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = UNAVAILABLE; + } + + String titleString = dvObjectIn.getCurrentName(); + + if (titleString.isEmpty() || titleString.equals(DatasetField.NA_VALUE)) { + titleString = UNAVAILABLE; + } + + metadata.put("datacite.creator", authorString); + metadata.put("datacite.title", titleString); + metadata.put("datacite.publisher", producerString); + metadata.put("datacite.publicationyear", generateYear(dvObjectIn)); + return metadata; + } + + protected Map addDOIMetadataForDestroyedDataset(DvObject dvObjectIn) { + Map metadata = new HashMap<>(); + String authorString = UNAVAILABLE; + String producerString = UNAVAILABLE; + String titleString = "This item has been removed from publication"; + + metadata.put("datacite.creator", authorString); + metadata.put("datacite.title", titleString); + metadata.put("datacite.publisher", producerString); + metadata.put("datacite.publicationyear", "9999"); + return metadata; + } + + protected String getTargetUrl(DvObject dvObjectIn) { + logger.log(Level.FINE, "getTargetUrl"); + return SystemConfig.getDataverseSiteUrlStatic() + dvObjectIn.getTargetUrl() + + dvObjectIn.getGlobalId().asString(); + } + + @Override + public String getIdentifier(DvObject dvObject) { + GlobalId gid = dvObject.getGlobalId(); + return gid != null ? gid.asString() : null; + } + + protected String generateYear(DvObject dvObjectIn) { + return dvObjectIn.getYearPublishedCreated(); + } + + public Map getMetadataForTargetURL(DvObject dvObject) { + logger.log(Level.FINE, "getMetadataForTargetURL"); + HashMap metadata = new HashMap<>(); + metadata.put("_target", getTargetUrl(dvObject)); + return metadata; + } + + @Override + public boolean alreadyRegistered(DvObject dvo) throws Exception { + if (dvo == null) { + logger.severe("Null DvObject sent to alreadyRegistered()."); + return false; + } + GlobalId globalId = dvo.getGlobalId(); + if (globalId == null) { + return false; + } + return alreadyRegistered(globalId, false); + } + + public abstract boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) throws Exception; + + /* + * ToDo: the DvObject being sent in provides partial support for the case where + * it has a different authority/protocol than what is configured (i.e. a legacy + * Pid that can actually be updated by the Pid account being used.) Removing + * this now would potentially break/make it harder to handle that case prior to + * support for configuring multiple Pid providers. Once that exists, it would be + * cleaner to always find the PidProvider associated with the + * protocol/authority/shoulder of the current dataset and then not pass the + * DvObject as a param. (This would also remove calls to get the settings since + * that would be done at construction.) + */ + @Override + public DvObject generatePid(DvObject dvObject) { + + if (dvObject.getProtocol() == null) { + dvObject.setProtocol(getProtocol()); + } else { + if (!dvObject.getProtocol().equals(getProtocol())) { + logger.warning("The protocol of the DvObject (" + dvObject.getProtocol() + + ") does not match the configured protocol (" + getProtocol() + ")"); + throw new IllegalArgumentException("The protocol of the DvObject (" + dvObject.getProtocol() + + ") doesn't match that of the provider, id: " + getId()); + } + } + if (dvObject.getAuthority() == null) { + dvObject.setAuthority(getAuthority()); + } else { + if (!dvObject.getAuthority().equals(getAuthority())) { + logger.warning("The authority of the DvObject (" + dvObject.getAuthority() + + ") does not match the configured authority (" + getAuthority() + ")"); + throw new IllegalArgumentException("The authority of the DvObject (" + dvObject.getAuthority() + + ") doesn't match that of the provider, id: " + getId()); + } + } + if (dvObject.isInstanceofDataset()) { + dvObject.setIdentifier(generateDatasetIdentifier((Dataset) dvObject)); + } else { + dvObject.setIdentifier(generateDataFileIdentifier((DataFile) dvObject)); + } + return dvObject; + } + + private String generateDatasetIdentifier(Dataset dataset) { + String shoulder = getShoulder(); + + switch (getIdentifierGenerationStyle()) { + case "randomString": + return generateIdentifierAsRandomString(dataset, shoulder); + case "storedProcGenerated": + return generateIdentifierFromStoredProcedureIndependent(dataset, shoulder); + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(dataset, shoulder); + } + } + + /** + * Check that a identifier entered by the user is unique (not currently used for + * any other study in this Dataverse Network) also check for duplicate in EZID + * if needed + * + * @param userIdentifier + * @param dataset + * @return {@code true} if the identifier is unique, {@code false} otherwise. + */ + public boolean isGlobalIdUnique(GlobalId globalId) { + if (!pidProviderService.isGlobalIdLocallyUnique(globalId)) { + return false; // duplication found in local database + } + + // not in local DB, look in the persistent identifier service + try { + return !alreadyRegistered(globalId, false); + } catch (Exception e) { + // we can live with failure - means identifier not found remotely + } + + return true; + } + + /** + * Parse a Persistent Id and set the protocol, authority, and identifier + * + * Example 1: doi:10.5072/FK2/BYM3IW protocol: doi authority: 10.5072 + * identifier: FK2/BYM3IW + * + * Example 2: hdl:1902.1/111012 protocol: hdl authority: 1902.1 identifier: + * 111012 + * + * @param identifierString + * @param separator the string that separates the authority from the + * identifier. + * @param destination the global id that will contain the parsed data. + * @return {@code destination}, after its fields have been updated, or + * {@code null} if parsing failed. + */ + @Override + public GlobalId parsePersistentId(String fullIdentifierString) { + // Occasionally, the protocol separator character ':' comes in still + // URL-encoded as %3A (usually as a result of the URL having been + // encoded twice): + fullIdentifierString = fullIdentifierString.replace("%3A", ":"); + + int index1 = fullIdentifierString.indexOf(':'); + if (index1 > 0) { // ':' found with one or more characters before it + String protocol = fullIdentifierString.substring(0, index1); + GlobalId globalId = parsePersistentId(protocol, fullIdentifierString.substring(index1 + 1)); + return globalId; + } + logger.log(Level.INFO, "Error parsing identifier: {0}: '':'' not found in string", + fullIdentifierString); + return null; + } + + protected GlobalId parsePersistentId(String protocol, String identifierString) { + String authority; + String identifier; + if (identifierString == null) { + return null; + } + int index = identifierString.indexOf(getSeparator()); + if (index > 0 && (index + 1) < identifierString.length()) { + // '/' found with one or more characters + // before and after it + // Strip any whitespace, ; and ' from authority (should finding them cause a + // failure instead?) + authority = PidProvider.formatIdentifierString(identifierString.substring(0, index)); + + if (PidProvider.testforNullTerminator(authority)) { + return null; + } + identifier = PidProvider.formatIdentifierString(identifierString.substring(index + 1)); + if (PidProvider.testforNullTerminator(identifier)) { + return null; + } + + } else { + logger.log(Level.INFO, "Error parsing identifier: {0}: '':/'' not found in string", + identifierString); + return null; + } + return parsePersistentId(protocol, authority, identifier); + } + + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId()); + if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { + return null; + } + // Check authority/identifier if this is a provider that manages specific + // identifiers + // /is not one of the unmanaged providers that has null authority + if (getAuthority() != null) { + + String cleanIdentifier = protocol + ":" + authority + getSeparator() + identifier; + /* + * Test if this provider manages this identifier - return null if it does not. + * It does match if ((the identifier's authority and shoulder match the + * provider's), or the identifier is in the managed set), and, in either case, + * the identifier is not in the excluded set. + */ + logger.fine("clean pid in " + getId() + ": " + cleanIdentifier); + logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier)); + logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier)); + + if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder())) + || getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) { + return null; + } + } + return new GlobalId(protocol, authority, identifier, getSeparator(), getUrlPrefix(), getId()); + } + + public String getSeparator() { + // The standard default + return SEPARATOR; + } + + private String generateDataFileIdentifier(DataFile datafile) { + String doiDataFileFormat = getDatafilePidFormat(); + + String prepend = ""; + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.DEPENDENT.toString())) { + // If format is dependent then pre-pend the dataset identifier + prepend = datafile.getOwner().getIdentifier() + SEPARATOR; + datafile.setProtocol(datafile.getOwner().getProtocol()); + datafile.setAuthority(datafile.getOwner().getAuthority()); + } else { + // If there's a shoulder prepend independent identifiers with it + prepend = getShoulder(); + datafile.setProtocol(getProtocol()); + datafile.setAuthority(getAuthority()); + } + + switch (getIdentifierGenerationStyle()) { + case "randomString": + return generateIdentifierAsRandomString(datafile, prepend); + case "storedProcGenerated": + if (doiDataFileFormat.equals(SystemConfig.DataFilePIDFormat.INDEPENDENT.toString())) { + return generateIdentifierFromStoredProcedureIndependent(datafile, prepend); + } else { + return generateIdentifierFromStoredProcedureDependent(datafile, prepend); + } + default: + /* Should we throw an exception instead?? -- L.A. 4.6.2 */ + return generateIdentifierAsRandomString(datafile, prepend); + } + } + + /* + * This method checks locally for a DvObject with the same PID and if that is + * OK, checks with the PID service. + * + * @param dvo - the object to check (ToDo - get protocol/authority from this + * PidProvider object) + * + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it + * could be the shoulder or the parent Dataset identifier + */ + private String generateIdentifierAsRandomString(DvObject dvo, String prepend) { + String identifier = null; + do { + identifier = prepend + RandomStringUtils.randomAlphanumeric(6).toUpperCase(); + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), + this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + /* + * This method checks locally for a DvObject with the same PID and if that is + * OK, checks with the PID service. + * + * @param dvo - the object to check (ToDo - get protocol/authority from this + * PidProvider object) + * + * @param prepend - for Datasets, this is always the shoulder, for DataFiles, it + * could be the shoulder or the parent Dataset identifier + */ + + private String generateIdentifierFromStoredProcedureIndependent(DvObject dvo, String prepend) { + String identifier; + do { + String identifierFromStoredProcedure = pidProviderService.generateNewIdentifierByStoredProcedure(); + // some diagnostics here maybe - is it possible to determine that it's failing + // because the stored procedure hasn't been created in the database? + if (identifierFromStoredProcedure == null) { + return null; + } + identifier = prepend + identifierFromStoredProcedure; + } while (!isGlobalIdUnique(new GlobalId(dvo.getProtocol(), dvo.getAuthority(), identifier, this.getSeparator(), + this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + /* + * This method is only used for DataFiles with DEPENDENT Pids. It is not for + * Datasets + * + */ + private String generateIdentifierFromStoredProcedureDependent(DataFile datafile, String prepend) { + String identifier; + Long retVal; + retVal = Long.valueOf(0L); + // ToDo - replace loops with one lookup for largest entry? (the do loop runs + // ~n**2/2 calls). The check for existingIdentifiers means this is mostly a + // local loop now, versus involving db or PidProvider calls, but still...) + + // This will catch identifiers already assigned in the current transaction (e.g. + // in FinalizeDatasetPublicationCommand) that haven't been committed to the db + // without having to make a call to the PIDProvider + Set existingIdentifiers = new HashSet(); + List files = datafile.getOwner().getFiles(); + for (DataFile f : files) { + existingIdentifiers.add(f.getIdentifier()); + } + + do { + retVal++; + identifier = prepend + retVal.toString(); + + } while (existingIdentifiers.contains(identifier) || !isGlobalIdUnique(new GlobalId(datafile.getProtocol(), + datafile.getAuthority(), identifier, this.getSeparator(), this.getUrlPrefix(), this.getId()))); + + return identifier; + } + + + @Override + public boolean canManagePID() { + // The default expectation is that PID providers are configured to manage some + // set (i.e. based on protocol/authority/shoulder) of PIDs + return true; + } + + @Override + public void setPidProviderServiceBean(PidProviderFactoryBean pidProviderServiceBean) { + this.pidProviderService = pidProviderServiceBean; + } + + @Override + public String getProtocol() { + return protocol; + } + + @Override + public String getAuthority() { + return authority; + } + + @Override + public String getShoulder() { + return shoulder; + } + + @Override + public String getIdentifierGenerationStyle() { + return identifierGenerationStyle; + } + + @Override + public String getDatafilePidFormat() { + return datafilePidFormat; + } + + @Override + public Set getManagedSet() { + return managedSet; + } + + @Override + public Set getExcludedSet() { + return excludedSet; + } + + @Override + public String getId() { + return id; + } + + @Override + public String getLabel() { + return label; + } + + @Override + /** + * True if this provider can manage PIDs in general, this pid is not in the + * managedSet (meaning it is managed but the provider does not generally manage + * it's protocol/authority/separator/shoulder) and either this provider is the + * same as the pid's or we're allowed to create INDEPENDENT pids. The latter + * clause covers the potential case where the effective pid provider/generator + * for the dataset is set to a different one that handles the dataset's pid + * itself. In this case, we can create file PIDs if they are independent. + * + * @param pid - the related pid to check + * @return true if this provider can manage PIDs like the one supplied + */ + public boolean canCreatePidsLike(GlobalId pid) { + return canManagePID() && !managedSet.contains(pid.asString()) + && (getIdentifierGenerationStyle().equals("INDEPENDENT") || getId().equals(pid.getProviderId())); + } + + @Override + public JsonObject getProviderSpecification() { + JsonObjectBuilder providerSpecification = Json.createObjectBuilder(); + providerSpecification.add("id", id); + providerSpecification.add("label", label); + providerSpecification.add("protocol", protocol); + providerSpecification.add("authority", authority); + providerSpecification.add("separator", getSeparator()); + providerSpecification.add("shoulder", shoulder); + providerSpecification.add("identifierGenerationStyle", identifierGenerationStyle); + providerSpecification.add("datafilePidFormat", datafilePidFormat); + providerSpecification.add("managedSet", Strings.join(",", managedSet.toArray())); + providerSpecification.add("excludedSet", Strings.join(",", excludedSet.toArray())); + return providerSpecification.build(); + } + + @Override + public boolean updateIdentifier(DvObject dvObject) { + //By default, these are the same + return publicizeIdentifier(dvObject); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java deleted file mode 100644 index d145a7ec106..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PermaLinkPidProviderServiceBean.java +++ /dev/null @@ -1,160 +0,0 @@ -package edu.harvard.iq.dataverse.pidproviders; - -import edu.harvard.iq.dataverse.AbstractGlobalIdServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; -import edu.harvard.iq.dataverse.util.SystemConfig; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.logging.Logger; - -import jakarta.annotation.PostConstruct; -import jakarta.ejb.Stateless; - -/** - * PermaLink provider - * This is a minimalist permanent ID provider intended for use with 'real' datasets/files where the use case none-the-less doesn't lend itself to the use of DOIs or Handles, e.g. - * * due to cost - * * for a catalog/archive where Dataverse has a dataset representing a dataset with DOI/handle stored elsewhere - * - * The initial implementation will mint identifiers locally and will provide the existing page URLs (using the ?persistentID= format). - * This will be overridable by a configurable parameter to support use of an external resolver. - * - */ -@Stateless -public class PermaLinkPidProviderServiceBean extends AbstractGlobalIdServiceBean { - - private static final Logger logger = Logger.getLogger(PermaLinkPidProviderServiceBean.class.getCanonicalName()); - - public static final String PERMA_PROTOCOL = "perma"; - public static final String PERMA_PROVIDER_NAME = "PERMA"; - - //ToDo - handle dataset/file defaults for local system - public static final String PERMA_RESOLVER_URL = JvmSettings.PERMALINK_BASEURL - .lookupOptional() - .orElse(SystemConfig.getDataverseSiteUrlStatic()); - - String authority = null; - private String separator = ""; - - @PostConstruct - private void init() { - if(PERMA_PROTOCOL.equals(settingsService.getValueForKey(Key.Protocol))){ - authority = settingsService.getValueForKey(Key.Authority); - configured=true; - }; - - } - - - //Only used in PidUtilTest - haven't figured out how to mock a PostConstruct call directly - // ToDo - remove after work to allow more than one Pid Provider which is expected to not use stateless beans - public void reInit() { - init(); - } - - @Override - public String getSeparator() { - //The perma default - return separator; - } - - @Override - public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { - // Perma doesn't manage registration, so we assume all local PIDs can be treated - // as registered - boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); - return existsLocally ? existsLocally : noProviderDefault; - } - - @Override - public boolean registerWhenPublished() { - return false; - } - - @Override - public List getProviderInformation() { - return List.of(PERMA_PROVIDER_NAME, PERMA_RESOLVER_URL); - } - - @Override - public String createIdentifier(DvObject dvo) throws Throwable { - //Call external resolver and send landing URL? - //FWIW: Return value appears to only be used in RegisterDvObjectCommand where success requires finding the dvo identifier in this string. (Also logged a couple places). - return(dvo.getGlobalId().asString()); - } - - @Override - public Map getIdentifierMetadata(DvObject dvo) { - Map map = new HashMap<>(); - return map; - } - - @Override - public String modifyIdentifierTargetURL(DvObject dvo) throws Exception { - return getTargetUrl(dvo); - } - - @Override - public void deleteIdentifier(DvObject dvo) throws Exception { - // no-op - } - - @Override - public boolean publicizeIdentifier(DvObject dvObject) { - //Generate if needed (i.e. datafile case where we don't create/register early (even with reigsterWhenPublished == false)) - if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); - } - //Call external resolver and send landing URL? - return true; - } - - @Override - public GlobalId parsePersistentId(String pidString) { - //ToDo - handle local PID resolver for dataset/file - if (pidString.startsWith(getUrlPrefix())) { - pidString = pidString.replace(getUrlPrefix(), - (PERMA_PROTOCOL + ":")); - } - return super.parsePersistentId(pidString); - } - - @Override - public GlobalId parsePersistentId(String protocol, String identifierString) { - logger.fine("Checking Perma: " + identifierString); - if (!PERMA_PROTOCOL.equals(protocol)) { - return null; - } - String identifier = null; - if (authority != null) { - if (identifierString.startsWith(authority)) { - identifier = identifierString.substring(authority.length()); - } - } - identifier = GlobalIdServiceBean.formatIdentifierString(identifier); - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { - return null; - } - return new GlobalId(PERMA_PROTOCOL, authority, identifier, separator, getUrlPrefix(), PERMA_PROVIDER_NAME); - } - - @Override - public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if (!PERMA_PROTOCOL.equals(protocol)) { - return null; - } - return super.parsePersistentId(protocol, authority, identifier); - } - - @Override - public String getUrlPrefix() { - - return PERMA_RESOLVER_URL + "/citation?persistentId=" + PERMA_PROTOCOL + ":"; - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java deleted file mode 100644 index 5bc855a9593..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidHelper.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.harvard.iq.dataverse.pidproviders; - -import java.util.Arrays; -import jakarta.annotation.PostConstruct; -import jakarta.ejb.EJB; -import jakarta.ejb.Singleton; -import jakarta.ejb.Startup; - -import edu.harvard.iq.dataverse.DOIDataCiteServiceBean; -import edu.harvard.iq.dataverse.DOIEZIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; - - /** - * This is a small helper bean - * As it is a singleton and built at application start (=deployment), it will inject the (stateless) - * dataverse service into the BrandingUtil once it's ready. - */ - @Startup - @Singleton - public class PidHelper { - - @EJB - DOIDataCiteServiceBean datacitePidSvc; - @EJB - DOIEZIdServiceBean ezidPidSvc; - @EJB - HandlenetServiceBean handlePidSvc; - @EJB - FakePidProviderServiceBean fakePidSvc; - @EJB - PermaLinkPidProviderServiceBean permaPidSvc; - @EJB - UnmanagedDOIServiceBean unmanagedDOISvc; - @EJB - UnmanagedHandlenetServiceBean unmanagedHandleSvc; - - @PostConstruct - public void listServices() { - PidUtil.addAllToProviderList(Arrays.asList(datacitePidSvc, ezidPidSvc, handlePidSvc, permaPidSvc, fakePidSvc)); - PidUtil.addAllToUnmanagedProviderList(Arrays.asList(unmanagedDOISvc, unmanagedHandleSvc)); - } - - } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java similarity index 56% rename from src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java index aebf13778c3..194a51eeae0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProvider.java @@ -1,19 +1,16 @@ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders; -import static edu.harvard.iq.dataverse.GlobalIdServiceBean.logger; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PidUtil; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import jakarta.json.JsonObject; +import jakarta.json.JsonValue; import java.util.*; -import java.util.function.Function; -import java.util.logging.Level; import java.util.logging.Logger; -public interface GlobalIdServiceBean { +public interface PidProvider { - static final Logger logger = Logger.getLogger(GlobalIdServiceBean.class.getCanonicalName()); + static final Logger logger = Logger.getLogger(PidProvider.class.getCanonicalName()); boolean alreadyRegistered(DvObject dvo) throws Exception; @@ -36,7 +33,6 @@ public interface GlobalIdServiceBean { boolean registerWhenPublished(); boolean canManagePID(); - boolean isConfigured(); List getProviderInformation(); @@ -52,36 +48,26 @@ public interface GlobalIdServiceBean { Map getMetadataForTargetURL(DvObject dvObject); - DvObject generateIdentifier(DvObject dvObject); + DvObject generatePid(DvObject dvObject); String getIdentifier(DvObject dvObject); boolean publicizeIdentifier(DvObject studyIn); - String generateDatasetIdentifier(Dataset dataset); - String generateDataFileIdentifier(DataFile datafile); + boolean updateIdentifier(DvObject dvObject); + boolean isGlobalIdUnique(GlobalId globalId); String getUrlPrefix(); String getSeparator(); - static GlobalIdServiceBean getBean(String protocol, CommandContext ctxt) { - final Function protocolHandler = BeanDispatcher.DISPATCHER.get(protocol); - if ( protocolHandler != null ) { - GlobalIdServiceBean theBean = protocolHandler.apply(ctxt); - if(theBean != null && theBean.isConfigured()) { - logger.fine("getBean returns " + theBean.getProviderInformation().get(0) + " for protocol " + protocol); - } - return theBean; - } else { - logger.log(Level.SEVERE, "Unknown protocol: {0}", protocol); - return null; - } - } - - static GlobalIdServiceBean getBean(CommandContext ctxt) { - return getBean(ctxt.settings().getValueForKey(Key.Protocol, ""), ctxt); - } + String getProtocol(); + String getProviderType(); + String getId(); + String getLabel(); + String getAuthority(); + String getShoulder(); + String getIdentifierGenerationStyle(); public static Optional parse(String identifierString) { try { @@ -111,6 +97,7 @@ public static Optional parse(String identifierString) { * {@code null} if parsing failed. */ public GlobalId parsePersistentId(String identifierString); + public GlobalId parsePersistentId(String protocol, String authority, String identifier); @@ -119,16 +106,16 @@ public static boolean isValidGlobalId(String protocol, String authority, String if (protocol == null || authority == null || identifier == null) { return false; } - if(!authority.equals(GlobalIdServiceBean.formatIdentifierString(authority))) { + if(!authority.equals(PidProvider.formatIdentifierString(authority))) { return false; } - if (GlobalIdServiceBean.testforNullTerminator(authority)) { + if (PidProvider.testforNullTerminator(authority)) { return false; } - if(!identifier.equals(GlobalIdServiceBean.formatIdentifierString(identifier))) { + if(!identifier.equals(PidProvider.formatIdentifierString(identifier))) { return false; } - if (GlobalIdServiceBean.testforNullTerminator(identifier)) { + if (PidProvider.testforNullTerminator(identifier)) { return false; } return true; @@ -177,40 +164,28 @@ static boolean checkDOIAuthority(String doiAuthority){ return true; } -} - - -/* - * ToDo - replace this with a mechanism like BrandingUtilHelper that would read - * the config and create PidProviders, one per set of config values and serve - * those as needed. The help has to be a bean to autostart and to hand the - * required service beans to the PidProviders. That may boil down to just the - * dvObjectService (to check for local identifier conflicts) since it will be - * the helper that has to read settings/get systewmConfig values. - * - */ - -/** - * Static utility class for dispatching implementing beans, based on protocol and providers. - * @author michael - */ -class BeanDispatcher { - static final Map> DISPATCHER = new HashMap<>(); - - static { - DISPATCHER.put("hdl", ctxt->ctxt.handleNet() ); - DISPATCHER.put("doi", ctxt->{ - String doiProvider = ctxt.settings().getValueForKey(Key.DoiProvider, ""); - switch ( doiProvider ) { - case "EZID": return ctxt.doiEZId(); - case "DataCite": return ctxt.doiDataCite(); - case "FAKE": return ctxt.fakePidProvider(); - default: - logger.log(Level.SEVERE, "Unknown doiProvider: {0}", doiProvider); - return null; - } - }); - - DISPATCHER.put(PermaLinkPidProviderServiceBean.PERMA_PROTOCOL, ctxt->ctxt.permaLinkProvider() ); - } + + public void setPidProviderServiceBean(PidProviderFactoryBean pidProviderFactoryBean); + + String getDatafilePidFormat(); + + Set getManagedSet(); + + Set getExcludedSet(); + + /** + * Whether related pids can be created by this pid provider + * @see edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider#canCreatePidsLike(GlobalId) more details in the abstract implementation + * + * @param pid + * @return - whether related pids can be created by this pid provider. + */ + boolean canCreatePidsLike(GlobalId pid); + + /** + * Returns a JSON representation of this pid provider including it's id, label, protocol, authority, separator, and identifier. + * @return + */ + public JsonObject getProviderSpecification(); + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java new file mode 100644 index 00000000000..f7c1a4b9174 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactory.java @@ -0,0 +1,8 @@ +package edu.harvard.iq.dataverse.pidproviders; + +public interface PidProviderFactory { + + String getType(); + + PidProvider createPidProvider(String id); +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java new file mode 100644 index 00000000000..40044408c63 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java @@ -0,0 +1,250 @@ +package edu.harvard.iq.dataverse.pidproviders; + +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.file.DirectoryStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.ServiceLoader; +import java.util.logging.Level; +import java.util.logging.Logger; + +import jakarta.annotation.PostConstruct; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.inject.Inject; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.UnmanagedDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.ezid.EZIdDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.UnmanagedHandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.UnmanagedPermaLinkPidProvider; + +/** + * This Bean loads all of the PidProviderFactory types available (e.g. EZID, + * DataCite, Handle, PermaLink) and then reads the configuration to load + * particular PidProviders (e.g. a DataCite provider with a specific + * authority/shoulder, username/password, etc.) + */ +@Startup +@Singleton +public class PidProviderFactoryBean { + + private static final Logger logger = Logger.getLogger(PidProviderFactoryBean.class.getCanonicalName()); + + @Inject + DataverseServiceBean dataverseService; + @EJB + protected SettingsServiceBean settingsService; + @Inject + protected DvObjectServiceBean dvObjectService; + @Inject + SystemConfig systemConfig; + + private ServiceLoader loader; + private Map pidProviderFactoryMap = new HashMap<>(); + + @PostConstruct + public void init() { + loadProviderFactories(); + loadProviders(); + } + + private void loadProviderFactories() { + /* + * Step 1 - find the PROVIDERS dir and add all jar files there to a class loader + */ + List jarUrls = new ArrayList<>(); + Optional providerPathSetting = JvmSettings.PIDPROVIDERS_DIRECTORY.lookupOptional(String.class); + if (providerPathSetting.isPresent()) { + Path providersDir = Paths.get(providerPathSetting.get()); + // Get all JAR files from the configured directory + try (DirectoryStream stream = Files.newDirectoryStream(providersDir, "*.jar")) { + // Using the foreach loop here to enable catching the URI/URL exceptions + for (Path path : stream) { + logger.log(Level.FINE, "Adding {0}", path.toUri().toURL()); + // This is the syntax required to indicate a jar file from which classes should + // be loaded (versus a class file). + jarUrls.add(new URL("jar:" + path.toUri().toURL() + "!/")); + } + } catch (IOException e) { + logger.warning("Problem accessing external Providers: " + e.getLocalizedMessage()); + } + } + URLClassLoader cl = URLClassLoader.newInstance(jarUrls.toArray(new URL[0]), this.getClass().getClassLoader()); + + /* + * Step 2 - load all PidProcviderFactories that can be found, using the jars as + * additional sources + */ + loader = ServiceLoader.load(PidProviderFactory.class, cl); + /* + * Step 3 - Fill pidProviderFactoryMap with type as the key, allow external + * factories to replace internal ones for the same type. FWIW: From the logging + * it appears that ServiceLoader returns classes in ~ alphabetical order rather + * than by class loader, so internal classes handling a given providerName may + * be processed before or after external ones. + */ + loader.forEach(providerFactory -> { + String type = providerFactory.getType(); + logger.fine("Loaded PidProviderFactory of type: " + type); + // If no entry for this providerName yet or if it is an external provider + if (!pidProviderFactoryMap.containsKey(type) || providerFactory.getClass().getClassLoader().equals(cl)) { + logger.fine("Adding PidProviderFactory of type: " + type + " to the map"); + pidProviderFactoryMap.put(type, providerFactory); + } + logger.log(Level.FINE, + "Loaded PidProviderFactory of type: " + type + " from " + + providerFactory.getClass().getCanonicalName() + " and classloader: " + + providerFactory.getClass().getClassLoader().getClass().getCanonicalName()); + }); + } + + private void loadProviders() { + Optional providers = JvmSettings.PID_PROVIDERS.lookupOptional(String[].class); + if (!providers.isPresent()) { + logger.warning( + "No PidProviders configured via dataverse.pid.providers. Please consider updating as older PIDProvider configuration mechanisms will be removed in a future version of Dataverse."); + } else { + for (String id : providers.get()) { + //Allows spaces in PID_PROVIDERS setting + id=id.trim(); + Optional type = JvmSettings.PID_PROVIDER_TYPE.lookupOptional(id); + if (!type.isPresent()) { + logger.warning("PidProvider " + id + + " listed in dataverse.pid.providers is not properly configured and will not be used."); + } else { + String typeString = type.get(); + if (pidProviderFactoryMap.containsKey(typeString)) { + PidProvider provider = pidProviderFactoryMap.get(typeString).createPidProvider(id); + provider.setPidProviderServiceBean(this); + PidUtil.addToProviderList(provider); + } + } + } + } + String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol); + String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder); + String provider = settingsService.getValueForKey(SettingsServiceBean.Key.DoiProvider); + + if (protocol != null && authority != null && shoulder != null && provider != null) { + logger.warning("Found legacy settings: " + protocol + " " + authority + " " + shoulder + " " + provider + + "Please consider updating as this PIDProvider configuration mechanism will be removed in a future version of Dataverse"); + if (PidUtil.getPidProvider(protocol, authority, shoulder) != null) { + logger.warning( + "Legacy PID provider settings found - ignored since a provider for the same protocol, authority, shoulder has been registered"); + } else { + PidProvider legacy = null; + // Try to add a legacy provider + String identifierGenerationStyle = settingsService + .getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "random"); + String dataFilePidFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, + "DEPENDENT"); + switch (protocol) { + case "doi": + switch (provider) { + case "EZID": + + String baseUrl = JvmSettings.LEGACY_EZID_API_URL.lookup(); + String username = JvmSettings.LEGACY_EZID_USERNAME.lookup(); + String password = JvmSettings.LEGACY_EZID_PASSWORD.lookup(); + PidUtil.addToProviderList(new EZIdDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", baseUrl, username, password)); + + break; + case "DataCite": + String mdsUrl = JvmSettings.LEGACY_DATACITE_MDS_API_URL.lookup(); + String restUrl = JvmSettings.LEGACY_DATACITE_REST_API_URL.lookup(); + // Defaults for testing where no account is set up + String dcUsername = JvmSettings.LEGACY_DATACITE_USERNAME.lookup(); + String dcPassword = JvmSettings.LEGACY_DATACITE_PASSWORD.lookup(); + if (mdsUrl != null && restUrl != null && dcUsername != null && dcPassword != null) { + legacy = new DataCiteDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", mdsUrl, restUrl, dcUsername, + dcPassword); + } + break; + case "FAKE": + logger.warning("Adding FAKE provider"); + legacy = new FakeDOIProvider("legacy", "legacy", authority, shoulder, identifierGenerationStyle, + dataFilePidFormat, "", ""); + break; + } + break; + case "hdl": + int index = JvmSettings.LEGACY_HANDLENET_INDEX.lookup(Integer.class); + String path = JvmSettings.LEGACY_HANDLENET_KEY_PATH.lookup(); + String passphrase = JvmSettings.LEGACY_HANDLENET_KEY_PASSPHRASE.lookup(); + boolean independentHandleService = settingsService + .isTrueForKey(SettingsServiceBean.Key.IndependentHandleService, false); + String handleAuthHandle = settingsService.getValueForKey(SettingsServiceBean.Key.HandleAuthHandle); + + legacy = new HandlePidProvider("legacy", "legacy", authority, shoulder, identifierGenerationStyle, + dataFilePidFormat, "", "", index, independentHandleService, handleAuthHandle, path, + passphrase); + break; + case "perma": + String baseUrl = JvmSettings.LEGACY_PERMALINK_BASEURL.lookup(); + legacy = new PermaLinkPidProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", baseUrl, + PermaLinkPidProvider.SEPARATOR); + } + if (legacy != null) { + legacy.setPidProviderServiceBean(this); + PidUtil.addToProviderList(legacy); + } + } + logger.info("Have " + PidUtil.getManagedProviderIds().size() + " managed PID providers"); + } + PidUtil.addAllToUnmanagedProviderList(Arrays.asList(new UnmanagedDOIProvider(), + new UnmanagedHandlePidProvider(), new UnmanagedPermaLinkPidProvider())); + } + + public String getProducer() { + return dataverseService.getRootDataverseName(); + } + + public boolean isGlobalIdLocallyUnique(GlobalId globalId) { + return dvObjectService.isGlobalIdLocallyUnique(globalId); + } + + String generateNewIdentifierByStoredProcedure() { + return dvObjectService.generateNewIdentifierByStoredProcedure(); + } + + public PidProvider getDefaultPidGenerator() { + Optional pidProviderDefaultId = JvmSettings.PID_DEFAULT_PROVIDER.lookupOptional(String.class); + if (pidProviderDefaultId.isPresent()) { + return PidUtil.getPidProvider(pidProviderDefaultId.get()); + } else { + String nonNullDefaultIfKeyNotFound = ""; + String protocol = settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, + nonNullDefaultIfKeyNotFound); + String authority = settingsService.getValueForKey(SettingsServiceBean.Key.Authority, + nonNullDefaultIfKeyNotFound); + String shoulder = settingsService.getValueForKey(SettingsServiceBean.Key.Shoulder, + nonNullDefaultIfKeyNotFound); + + return PidUtil.getPidProvider(protocol, authority, shoulder); + } + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java index 78305648f67..279f18dcd0e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidUtil.java @@ -1,9 +1,8 @@ package edu.harvard.iq.dataverse.pidproviders; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; -import edu.harvard.iq.dataverse.HandlenetServiceBean; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.InputStream; @@ -14,6 +13,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; import jakarta.json.Json; @@ -113,24 +113,22 @@ public static JsonObjectBuilder queryDoi(GlobalId globalId, String baseUrl, Stri * @return DOI in the form 10.7910/DVN/TJCLKP (no "doi:") */ private static String acceptOnlyDoi(GlobalId globalId) { - if (!DOIServiceBean.DOI_PROTOCOL.equals(globalId.getProtocol())) { + if (!AbstractDOIProvider.DOI_PROTOCOL.equals(globalId.getProtocol())) { throw new IllegalArgumentException(BundleUtil.getStringFromBundle("pids.datacite.errors.DoiOnly")); } return globalId.getAuthority() + "/" + globalId.getIdentifier(); } - static Map providerMap = new HashMap(); - static Map unmanagedProviderMap = new HashMap(); + static Map providerMap = new HashMap(); + static Map unmanagedProviderMap = new HashMap(); - public static void addAllToProviderList(List list) { - for (GlobalIdServiceBean pidProvider : list) { - providerMap.put(pidProvider.getProviderInformation().get(0), pidProvider); - } + public static void addToProviderList(PidProvider pidProvider) { + providerMap.put(pidProvider.getId(), pidProvider); } - public static void addAllToUnmanagedProviderList(List list) { - for (GlobalIdServiceBean pidProvider : list) { - unmanagedProviderMap.put(pidProvider.getProviderInformation().get(0), pidProvider); + public static void addAllToUnmanagedProviderList(List list) { + for (PidProvider pidProvider : list) { + unmanagedProviderMap.put(pidProvider.getId(), pidProvider); } } @@ -141,7 +139,7 @@ public static void addAllToUnmanagedProviderList(List list) */ public static GlobalId parseAsGlobalID(String identifier) { logger.fine("In parseAsGlobalId: " + providerMap.size()); - for (GlobalIdServiceBean pidProvider : providerMap.values()) { + for (PidProvider pidProvider : providerMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(identifier); if (globalId != null) { @@ -149,7 +147,7 @@ public static GlobalId parseAsGlobalID(String identifier) { } } // If no providers can managed this PID, at least allow it to be recognized - for (GlobalIdServiceBean pidProvider : unmanagedProviderMap.values()) { + for (PidProvider pidProvider : unmanagedProviderMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(identifier); if (globalId != null) { @@ -167,14 +165,14 @@ public static GlobalId parseAsGlobalID(String identifier) { public static GlobalId parseAsGlobalID(String protocol, String authority, String identifier) { logger.fine("Looking for " + protocol + " " + authority + " " + identifier); logger.fine("In parseAsGlobalId: " + providerMap.size()); - for (GlobalIdServiceBean pidProvider : providerMap.values()) { + for (PidProvider pidProvider : providerMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(protocol, authority, identifier); if (globalId != null) { return globalId; } } - for (GlobalIdServiceBean pidProvider : unmanagedProviderMap.values()) { + for (PidProvider pidProvider : unmanagedProviderMap.values()) { logger.fine(" Checking " + String.join(",", pidProvider.getProviderInformation())); GlobalId globalId = pidProvider.parsePersistentId(protocol, authority, identifier); if (globalId != null) { @@ -191,28 +189,96 @@ public static GlobalId parseAsGlobalID(String protocol, String authority, String * This method should be deprecated/removed when further refactoring to support * multiple PID providers is done. At that point, when the providers aren't * beans, this code can be moved into other classes that go in the providerMap. - * If this method is not kept in sync with the DOIServiceBean and - * HandlenetServiceBean implementations, the tests using it won't be valid tests - * of the production code. + * If this method is not kept in sync with the AbstractDOIProvider and HandlePidProvider + * implementations, the tests using it won't be valid tests of the production + * code. */ private static GlobalId parseUnmanagedDoiOrHandle(String protocol, String authority, String identifier) { // Default recognition - could be moved to new classes in the future. - if (!GlobalIdServiceBean.isValidGlobalId(protocol, authority, identifier)) { + if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { return null; } String urlPrefix = null; switch (protocol) { - case DOIServiceBean.DOI_PROTOCOL: - if (!GlobalIdServiceBean.checkDOIAuthority(authority)) { + case AbstractDOIProvider.DOI_PROTOCOL: + if (!PidProvider.checkDOIAuthority(authority)) { return null; } - urlPrefix = DOIServiceBean.DOI_RESOLVER_URL; + urlPrefix = AbstractDOIProvider.DOI_RESOLVER_URL; break; - case HandlenetServiceBean.HDL_PROTOCOL: - urlPrefix = HandlenetServiceBean.HDL_RESOLVER_URL; + case HandlePidProvider.HDL_PROTOCOL: + urlPrefix = HandlePidProvider.HDL_RESOLVER_URL; break; } return new GlobalId(protocol, authority, identifier, "/", urlPrefix, null); } + + /** + * Get a PidProvider by name. GlobalIds have a getProviderName() method so this + * method is often used as + * getPidProvider(dvObject.getGlobalId().getProviderName(); (which will fail if + * the GlobalId is null - use PidProviderFactoryBean.getPidProvider(DvObject) if + * you aren't sure. + * + */ + + public static PidProvider getPidProvider(String name) { + for (PidProvider pidProvider : providerMap.values()) { + if (name.equals(pidProvider.getId())) { + return pidProvider; + } + } + for (PidProvider pidProvider : unmanagedProviderMap.values()) { + if (name.equals(pidProvider.getId())) { + return pidProvider; + } + } + return null; + } + + + + /** + * Method to clear all managed/unmanaged PidProviders. Only for testing as these + * lists are only loaded once by the @Stateless PidProviderFactoryBean in Dataverse. + */ + public static void clearPidProviders() { + providerMap.clear(); + unmanagedProviderMap.clear(); + } + + /** + * Get a PidProvider by protocol/authority/shoulder. + */ + public static PidProvider getPidProvider(String protocol, String authority, String shoulder) { + return getPidProvider(protocol, authority, shoulder, AbstractPidProvider.SEPARATOR); + } + + public static PidProvider getPidProvider(String protocol, String authority, String shoulder, String separator) { + for (PidProvider pidProvider : providerMap.values()) { + if (protocol.equals(pidProvider.getProtocol()) && authority.equals(pidProvider.getAuthority()) + && shoulder.equals(pidProvider.getShoulder()) && separator.equals(pidProvider.getSeparator())) { + return pidProvider; + } + } + for (PidProvider pidProvider : unmanagedProviderMap.values()) { + if (protocol.equals(pidProvider.getProtocol())) { + return pidProvider; + } + } + return null; + } + + public static Set getManagedProviderIds() { + return providerMap.keySet(); + } + + public static JsonObject getProviders() { + JsonObjectBuilder builder = Json.createObjectBuilder(); + for (PidProvider pidProvider : providerMap.values()) { + builder.add(pidProvider.getId(), pidProvider.getProviderSpecification()); + } + return builder.build(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java new file mode 100644 index 00000000000..43e34e74c59 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -0,0 +1,123 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.Arrays; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; + + +public abstract class AbstractDOIProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(AbstractDOIProvider.class.getCanonicalName()); + + public static final String DOI_PROTOCOL = "doi"; + public static final String DOI_RESOLVER_URL = "https://doi.org/"; + public static final String HTTP_DOI_RESOLVER_URL = "http://doi.org/"; + public static final String DXDOI_RESOLVER_URL = "https://dx.doi.org/"; + public static final String HTTP_DXDOI_RESOLVER_URL = "http://dx.doi.org/"; + + public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { + super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + } + + //For Unmanged provider + public AbstractDOIProvider(String name, String label) { + super(name, label, DOI_PROTOCOL); + } + + @Override + public GlobalId parsePersistentId(String pidString) { + if (pidString.startsWith(DOI_RESOLVER_URL)) { + pidString = pidString.replace(DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(HTTP_DOI_RESOLVER_URL)) { + pidString = pidString.replace(HTTP_DOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } else if (pidString.startsWith(DXDOI_RESOLVER_URL)) { + pidString = pidString.replace(DXDOI_RESOLVER_URL, + (DOI_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + GlobalId globalId = super.parsePersistentId(protocol, identifierString); + if (globalId!=null && !PidProvider.checkDOIAuthority(globalId.getAuthority())) { + return null; + } + return globalId; + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + + if (!DOI_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + public String getUrlPrefix() { + return DOI_RESOLVER_URL; + } + + protected String getProviderKeyName() { + return null; + } + + public String getProtocol() { + return DOI_PROTOCOL; + } + + public String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { + + Dataset dataset = null; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + if (dvObject.isInstanceofDataset()) { + metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + } + if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + String fileDescription = df.getDescription(); + metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + } + + metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); + metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + metadataTemplate.setTitle(dvObject.getCurrentName()); + String producerString = pidProviderService.getProducer(); + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = UNAVAILABLE; + } + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java similarity index 73% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java index f7e9372cc9b..d4e674f8396 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedDOIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/UnmanagedDOIProvider.java @@ -1,17 +1,11 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.doi; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.logging.Logger; - -import jakarta.annotation.PostConstruct; -import jakarta.ejb.Stateless; - import org.apache.commons.httpclient.HttpException; import org.apache.commons.lang3.NotImplementedException; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; @@ -20,15 +14,13 @@ * */ -@Stateless -public class UnmanagedDOIServiceBean extends DOIServiceBean { +public class UnmanagedDOIProvider extends AbstractDOIProvider { - private static final Logger logger = Logger.getLogger(UnmanagedDOIServiceBean.class.getCanonicalName()); + public static final String ID = "UnmanagedDOIProvider"; - @PostConstruct - private void init() { - // Always on - configured = true; + public UnmanagedDOIProvider() { + //Also using ID as label + super(ID, ID); } @Override @@ -73,11 +65,15 @@ public boolean publicizeIdentifier(DvObject dvObject) { @Override public List getProviderInformation() { - return List.of("UnmanagedDOIProvider", ""); + return List.of(getId(), ""); } + @Override + public String getProviderType() { + return "unamagedDOI"; + } // PID recognition - // Done by DOIServiceBean + // Done by AbstractDOIProvider } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java new file mode 100644 index 00000000000..30e4dfd79cc --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -0,0 +1,314 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; + +public class XmlMetadataTemplate { + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); + private static String template; + + static { + try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { + template = new String(in.readAllBytes(), StandardCharsets.UTF_8); + } catch (Exception e) { + logger.log(Level.SEVERE, "datacite metadata template load error"); + logger.log(Level.SEVERE, "String " + e.toString()); + logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); + logger.log(Level.SEVERE, "cause " + e.getCause()); + logger.log(Level.SEVERE, "message " + e.getMessage()); + } + } + + private String xmlMetadata; + private String identifier; + private List datafileIdentifiers; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List authors; + private String description; + private List contacts; + private List producers; + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + public XmlMetadataTemplate() { + } + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + + public XmlMetadataTemplate(String xmlMetaData) { + this.xmlMetadata = xmlMetaData; + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String generateXML(DvObject dvObject) { + // Can't use "UNKNOWN" here because DataCite will respond with "[facet + // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" + String publisherYearFinal = "9999"; + // FIXME: Investigate why this.publisherYear is sometimes null now that pull + // request #4606 has been merged. + if (this.publisherYear != null) { + // Added to prevent a NullPointerException when trying to destroy datasets when + // using DataCite rather than EZID. + publisherYearFinal = this.publisherYear; + } + xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) + .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) + .replace("${description}", this.description); + + StringBuilder creatorsElement = new StringBuilder(); + if (authors != null && !authors.isEmpty()) { + for (DatasetAuthor author : authors) { + creatorsElement.append(""); + creatorsElement.append(author.getName().getDisplayValue()); + creatorsElement.append(""); + + if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() + && !author.getIdValue().isEmpty() && author.getAffiliation() != null + && !author.getAffiliation().getDisplayValue().isEmpty()) { + + if (author.getIdType().equals("ORCID")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + if (author.getIdType().equals("ISNI")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + if (author.getIdType().equals("LCNA")) { + creatorsElement.append( + "" + + author.getIdValue() + ""); + } + } + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + creatorsElement + .append("" + author.getAffiliation().getDisplayValue() + ""); + } + creatorsElement.append(""); + } + + } else { + creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) + .append(""); + } + + xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + + StringBuilder contributorsElement = new StringBuilder(); + if (this.getContacts() != null) { + for (String[] contact : this.getContacts()) { + if (!contact[0].isEmpty()) { + contributorsElement.append("" + + contact[0] + ""); + if (!contact[1].isEmpty()) { + contributorsElement.append("" + contact[1] + ""); + } + contributorsElement.append(""); + } + } + } + + if (this.getProducers() != null) { + for (String[] producer : this.getProducers()) { + contributorsElement.append("" + producer[0] + + ""); + if (!producer[1].isEmpty()) { + contributorsElement.append("" + producer[1] + ""); + } + contributorsElement.append(""); + } + } + + String relIdentifiers = generateRelatedIdentifiers(dvObject); + + xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + + xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); + return xmlMetadata; + } + + private String generateRelatedIdentifiers(DvObject dvObject) { + + StringBuilder sb = new StringBuilder(); + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + + datafileIdentifiers = new ArrayList<>(); + for (DataFile dataFile : dataset.getFiles()) { + if (dataFile.getGlobalId() != null) { + if (sb.toString().isEmpty()) { + sb.append(""); + } + sb.append("" + + dataFile.getGlobalId() + ""); + } + } + + if (!sb.toString().isEmpty()) { + sb.append(""); + } + } + } else if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + sb.append(""); + sb.append("" + + df.getOwner().getGlobalId() + ""); + sb.append(""); + } + return sb.toString(); + } + + public void generateFileIdentifiers(DvObject dvObject) { + + if (dvObject.isInstanceofDataset()) { + Dataset dataset = (Dataset) dvObject; + + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + + datafileIdentifiers = new ArrayList<>(); + for (DataFile dataFile : dataset.getFiles()) { + datafileIdentifiers.add(dataFile.getIdentifier()); + int x = xmlMetadata.indexOf("") - 1; + xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); + xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + + template.substring(x, template.length() - 1); + + } + + } else { + xmlMetadata = xmlMetadata.replace( + "${relatedIdentifier}", + ""); + } + } + } + + public static String getTemplate() { + return template; + } + + public static void setTemplate(String template) { + XmlMetadataTemplate.template = template; + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java new file mode 100644 index 00000000000..cda70cbc506 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -0,0 +1,257 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.commons.text.StringEscapeUtils; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; + +import org.xmlunit.builder.DiffBuilder; +import org.xmlunit.builder.Input; +import org.xmlunit.builder.Input.Builder; +import org.xmlunit.diff.Diff; +import org.xmlunit.diff.Difference; + +/** + * + * @author luopc + */ +public class DOIDataCiteRegisterService { + + private static final Logger logger = Logger.getLogger(DOIDataCiteRegisterService.class.getCanonicalName()); + + + //A singleton since it, and the httpClient in it can be reused. + private DataCiteRESTfullClient client=null; + + public DOIDataCiteRegisterService(String url, String username, String password) { + client = new DataCiteRESTfullClient(url, username, password); + } + + /** + * This "reserveIdentifier" method is heavily based on the + * "registerIdentifier" method below but doesn't, this one doesn't doesn't + * register a URL, which causes the "state" of DOI to transition from + * "draft" to "findable". Here are some DataCite docs on the matter: + * + * "DOIs can exist in three states: draft, registered, and findable. DOIs + * are in the draft state when metadata have been registered, and will + * transition to the findable state when registering a URL." -- + * https://support.datacite.org/docs/mds-api-guide#doi-states + */ + public String reserveIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + + retString = client.postMetadata(xmlMetadata); + + return retString; + } + + public String registerIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + String target = metadata.get("_target"); + + retString = client.postMetadata(xmlMetadata); + client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); + + return retString; + } + + + public String reRegisterIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + String numericIdentifier = identifier.substring(identifier.indexOf(":") + 1); + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + String target = metadata.get("_target"); + String currentMetadata = client.getMetadata(numericIdentifier); + Diff myDiff = DiffBuilder.compare(xmlMetadata) + .withTest(currentMetadata).ignoreWhitespace().checkForSimilar() + .build(); + + if (myDiff.hasDifferences()) { + for(Difference d : myDiff.getDifferences()) { + + logger.fine(d.toString()); + } + retString = "metadata:\\r" + client.postMetadata(xmlMetadata) + "\\r"; + } + if (!target.equals(client.getUrl(numericIdentifier))) { + logger.info("Updating target URL to " + target); + client.postUrl(numericIdentifier, target); + retString = retString + "url:\\r" + target; + + } + + return retString; + } + + + public String deactivateIdentifier(String identifier, Map metadata, DvObject dvObject) throws IOException { + String retString = ""; + + String metadataString = getMetadataForDeactivateIdentifier(identifier, metadata, dvObject); + retString = client.postMetadata(metadataString); + retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); + + return retString; + } + + public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { + + Dataset dataset = null; + + if (dvObject instanceof Dataset) { + dataset = (Dataset) dvObject; + } else { + dataset = (Dataset) dvObject.getOwner(); + } + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + if (dvObject.isInstanceofDataset()) { + //While getDescriptionPlainText strips < and > from HTML, it leaves '&' (at least so we need to xml escape as well + String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); + if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { + description = AbstractPidProvider.UNAVAILABLE; + } + metadataTemplate.setDescription(description); + } + if (dvObject.isInstanceofDataFile()) { + DataFile df = (DataFile) dvObject; + //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. + //This could/should be removed if the datafile methods add escaping + String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); + metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + } + + metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); + metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + String title = dvObject.getCurrentName(); + if(dvObject.isInstanceofDataFile()) { + //Note file title is not currently escaped the way the dataset title is, so adding it here. + title = StringEscapeUtils.escapeXml10(title); + } + + if (title.isEmpty() || title.equals(DatasetField.NA_VALUE)) { + title = AbstractPidProvider.UNAVAILABLE; + } + + metadataTemplate.setTitle(title); + String producerString = BrandingUtil.getRootDataverseCollectionName(); + if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { + producerString = AbstractPidProvider.UNAVAILABLE; + } + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + + public static String getMetadataForDeactivateIdentifier(String identifier, Map metadata, DvObject dvObject) { + + XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); + metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + + metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + + String title =metadata.get("datacite.title"); + + System.out.print("Map metadata title: "+ metadata.get("datacite.title")); + + metadataTemplate.setAuthors(null); + + metadataTemplate.setTitle(title); + String producerString = AbstractPidProvider.UNAVAILABLE; + + metadataTemplate.setPublisher(producerString); + metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + + String xmlMetadata = metadataTemplate.generateXML(dvObject); + logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); + return xmlMetadata; + } + + public String modifyIdentifier(String identifier, Map metadata, DvObject dvObject) + throws IOException { + + String xmlMetadata = getMetadataFromDvObject(identifier, metadata, dvObject); + + logger.fine("XML to send to DataCite: " + xmlMetadata); + + String status = metadata.get("_status").trim(); + String target = metadata.get("_target"); + String retString = ""; + switch (status) { + case DataCiteDOIProvider.DRAFT: + // draft DOIs aren't currently being updated after every edit - ToDo - should + // this be changed or made optional? + retString = "success to reserved " + identifier; + break; + case DataCiteDOIProvider.FINDABLE: + try { + retString = client.postMetadata(xmlMetadata); + client.postUrl(identifier.substring(identifier.indexOf(":") + 1), target); + } catch (UnsupportedEncodingException ex) { + logger.log(Level.SEVERE, null, ex); + } catch (RuntimeException rte) { + logger.log(Level.SEVERE, "Error creating DOI at DataCite: {0}", rte.getMessage()); + logger.log(Level.SEVERE, "Exception", rte); + } + break; + case DataCiteDOIProvider.REGISTERED: + retString = client.inactiveDataset(identifier.substring(identifier.indexOf(":") + 1)); + break; + } + return retString; + } + + public boolean testDOIExists(String identifier) { + boolean doiExists; + try { + doiExists = client.testDOIExists(identifier.substring(identifier.indexOf(":") + 1)); + } catch (Exception e) { + logger.log(Level.INFO, identifier, e); + return false; + } + return doiExists; + } + + Map getMetadata(String identifier) throws IOException { + Map metadata = new HashMap<>(); + try { + String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); + XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", template.getCreators())); + metadata.put("datacite.title", template.getTitle()); + metadata.put("datacite.publisher", template.getPublisher()); + metadata.put("datacite.publicationyear", template.getPublisherYear()); + } catch (RuntimeException e) { + logger.log(Level.INFO, identifier, e); + } + return metadata; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java new file mode 100644 index 00000000000..cd765933796 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -0,0 +1,343 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.HttpStatus; + +/** + * + * @author luopc + */ +public class DataCiteDOIProvider extends AbstractDOIProvider { + + private static final Logger logger = Logger.getLogger(DataCiteDOIProvider.class.getCanonicalName()); + + static final String FINDABLE = "findable"; //public - published dataset versions + static final String DRAFT = "draft"; //reserved but not findable yet - draft/unpublished datasets + static final String REGISTERED = "registered"; //was findable once, not anymore - deaccessioned datasets + static final String NONE = "none"; //no record - draft/unpublished datasets where the initial request to reserve has failed + + public static final String TYPE = "datacite"; + + + private String mdsUrl; + private String apiUrl; + private String username; + private String password; + + private DOIDataCiteRegisterService doiDataCiteRegisterService; + + public DataCiteDOIProvider(String id, String label, String providerAuthority, String providerShoulder, + String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList, + String mdsUrl, String apiUrl, String username, String password) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, + excludedList); + this.mdsUrl = mdsUrl; + this.apiUrl = apiUrl; + this.username = username; + this.password = password; + doiDataCiteRegisterService = new DOIDataCiteRegisterService(mdsUrl, username, password); + } + + @Override + public boolean registerWhenPublished() { + return false; + } + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) { + logger.log(Level.FINE, "alreadyRegistered"); + if (pid == null || pid.asString().isEmpty()) { + logger.fine("No identifier sent."); + return false; + } + boolean alreadyRegistered; + String identifier = pid.asString(); + try { + alreadyRegistered = doiDataCiteRegisterService.testDOIExists(identifier); + } catch (Exception e) { + logger.log(Level.WARNING, "alreadyRegistered failed"); + return false; + } + return alreadyRegistered; + } + + @Override + public String createIdentifier(DvObject dvObject) throws Exception { + logger.log(Level.FINE, "createIdentifier"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + String identifier = getIdentifier(dvObject); + Map metadata = getMetadataForCreateIndicator(dvObject); + metadata.put("_status", DRAFT); + try { + String retString = doiDataCiteRegisterService.reserveIdentifier(identifier, metadata, dvObject); + logger.log(Level.FINE, "create DOI identifier retString : " + retString); + return retString; + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier not created: create failed", e); + throw e; + } + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + logger.log(Level.FINE, "getIdentifierMetadata"); + String identifier = getIdentifier(dvObject); + Map metadata = new HashMap<>(); + try { + metadata = doiDataCiteRegisterService.getMetadata(identifier); + metadata.put("_status", getPidStatus(dvObject)); + } catch (Exception e) { + logger.log(Level.WARNING, "getIdentifierMetadata failed", e); + } + return metadata; + } + + /** + * Modifies the DOI metadata for a Dataset + * + * @param dvObject the dvObject whose metadata needs to be modified + * @return the Dataset identifier, or null if the modification failed + * @throws java.lang.Exception + */ + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + logger.log(Level.FINE, "modifyIdentifier"); + String identifier = getIdentifier(dvObject); + try { + Map metadata = getIdentifierMetadata(dvObject); + doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed", e); + throw e; + } + return identifier; + } + + /* + * Deletes a DOI if it is in DRAFT/DRAFT state or removes metadata and + * changes it from PUBLIC/FINDABLE to REGISTERED. + */ + @Override + public void deleteIdentifier(DvObject dvObject) throws IOException, HttpException { + logger.log(Level.FINE, "deleteIdentifier"); + String identifier = getIdentifier(dvObject); + String idStatus = getPidStatus(dvObject); + switch (idStatus) { + case DRAFT: + logger.log(Level.FINE, "Delete status is reserved.."); + deleteDraftIdentifier(dvObject); + break; + case FINDABLE: + // if public then it has been released set to REGISTERED/unavailable and reset + // target to n2t url + Map metadata = addDOIMetadataForDestroyedDataset(dvObject); + metadata.put("_status", "registered"); + metadata.put("_target", getTargetUrl(dvObject)); + doiDataCiteRegisterService.deactivateIdentifier(identifier, metadata, dvObject); + break; + + case REGISTERED: + case NONE: + // Nothing to do + } + } + + /** + * Deletes DOI from the DataCite side, if possible. Only "draft" DOIs can be + * deleted. + */ + private void deleteDraftIdentifier(DvObject dvObject) throws IOException { + + GlobalId doi = dvObject.getGlobalId(); + /** + * Deletes the DOI from DataCite if it can. Returns 204 if PID was deleted (only + * possible for "draft" DOIs), 405 (method not allowed) if the DOI wasn't + * deleted (because it's in "findable" state, for example, 404 if the DOI wasn't + * found, and possibly other status codes such as 500 if DataCite is down. + */ + + URL url = new URL(getApiUrl() + "/dois/" + doi.getAuthority() + "/" + doi.getIdentifier()); + HttpURLConnection connection = null; + connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("DELETE"); + String userpass = getUsername() + ":" + getPassword(); + String basicAuth = "Basic " + new String(Base64.getEncoder().encode(userpass.getBytes())); + connection.setRequestProperty("Authorization", basicAuth); + int status = connection.getResponseCode(); + if (status != HttpStatus.SC_NO_CONTENT) { + logger.warning( + "Incorrect Response Status from DataCite: " + status + " : " + connection.getResponseMessage()); + throw new HttpException("Status: " + status); + } + logger.fine("deleteDoi status for " + doi.asString() + ": " + status); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + logger.log(Level.FINE, "updateIdentifierStatus"); + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + String identifier = getIdentifier(dvObject); + Map metadata = getUpdateMetadata(dvObject); + metadata.put("_status", FINDABLE); + metadata.put("datacite.publicationyear", generateYear(dvObject)); + metadata.put("_target", getTargetUrl(dvObject)); + try { + doiDataCiteRegisterService.registerIdentifier(identifier, metadata, dvObject); + return true; + } catch (Exception e) { + logger.log(Level.WARNING, "modifyMetadata failed: " + e.getMessage(), e); + return false; + } + } + + @Override + public List getProviderInformation() { + return List.of(getId(), "https://status.datacite.org"); + } + + @Override + protected String getProviderKeyName() { + return "DataCite"; + } + + @Override + public String getProviderType() { + // TODO Auto-generated method stub + return null; + } + + public String getMdsUrl() { + return mdsUrl; + } + + public String getApiUrl() { + return apiUrl; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + /** + * Method to determine the status of a dvObject's PID. It replaces keeping a + * separate DOIDataCiteRegisterCache. We could also try to get this info from + * DataCite directly, but it appears to not be in the xml metadata return, so it + * would require another/different api call (possible ToDo). + * + * @param dvObject - Dataset or DataFile + * @return PID status - NONE, DRAFT, FINDABLE, or REGISTERED + */ + String getPidStatus(DvObject dvObject) { + String status = NONE; + if (dvObject instanceof Dataset) { + Dataset dataset = (Dataset) dvObject; + // return true, if all published versions were deaccessioned + boolean hasDeaccessionedVersions = false; + for (DatasetVersion testDsv : dataset.getVersions()) { + if (testDsv.isReleased()) { + // With any released version, we're done + return FINDABLE; + } + // Also check for draft version + if (testDsv.isDraft()) { + if (dataset.isIdentifierRegistered()) { + status = DRAFT; + // Keep interating to see if there's a released version + } + } + if (testDsv.isDeaccessioned()) { + hasDeaccessionedVersions = true; + // Keep interating to see if there's a released version + } + } + if (hasDeaccessionedVersions) { + if (dataset.isIdentifierRegistered()) { + return REGISTERED; + } + } + return status; + } else if (dvObject instanceof DataFile) { + DataFile df = (DataFile) dvObject; + // return true, if all published versions were deaccessioned + boolean isInDeaccessionedVersions = false; + for (FileMetadata fmd : df.getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + // With any released version, we're done + return FINDABLE; + } + // Also check for draft version + if (testDsv.isDraft()) { + if (df.isIdentifierRegistered()) { + status = DRAFT; + // Keep interating to see if there's a released/deaccessioned version + } + } + if (testDsv.isDeaccessioned()) { + isInDeaccessionedVersions = true; + // Keep interating to see if there's a released version + } + } + if (isInDeaccessionedVersions) { + if (df.isIdentifierRegistered()) { + return REGISTERED; + } + } + + } + return status; + } + + + @Override + public boolean updateIdentifier(DvObject dvObject) { + logger.log(Level.FINE,"updateIdentifierStatus"); + if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ + dvObject = generatePid(dvObject); + } + String identifier = getIdentifier(dvObject); + Map metadata = getUpdateMetadata(dvObject); + metadata.put("_status", "public"); + metadata.put("datacite.publicationyear", generateYear(dvObject)); + metadata.put("_target", getTargetUrl(dvObject)); + try { + String updated = doiDataCiteRegisterService.reRegisterIdentifier(identifier, metadata, dvObject); + if(updated.length()!=0) { + logger.info(identifier + "updated: " + updated ); + return true; + } else { + logger.info("No updated needed for " + identifier); + return false; //No update needed + } + } catch (Exception e) { + logger.log(Level.WARNING, "updateIdentifier failed: " + e.getMessage(), e); + return false; + } + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java new file mode 100644 index 00000000000..99d13b2647c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderFactory.java @@ -0,0 +1,43 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class DataCiteProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(DataCiteDOIProvider.TYPE)) { + // Being asked to create a non-DataCite provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String mdsUrl = JvmSettings.DATACITE_MDS_API_URL.lookupOptional(providerId).orElse("https://mds.test.datacite.org"); + String apiUrl = JvmSettings.DATACITE_REST_API_URL.lookupOptional(providerId).orElse("https://api.test.datacite.org"); + String username = JvmSettings.DATACITE_USERNAME.lookup(providerId); + String password = JvmSettings.DATACITE_PASSWORD.lookup(providerId); + + return new DataCiteDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, mdsUrl, apiUrl, username, password); + } + + public String getType() { + return DataCiteDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java similarity index 92% rename from src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java index 491f19ab36c..d185b0249b9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCiteRESTfullClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java @@ -3,7 +3,7 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; import java.io.Closeable; @@ -45,21 +45,14 @@ public class DataCiteRESTfullClient implements Closeable { private HttpClientContext context; private String encoding = "utf-8"; - public DataCiteRESTfullClient(String url, String username, String password) throws IOException { + public DataCiteRESTfullClient(String url, String username, String password) { this.url = url; - try { - context = HttpClientContext.create(); - CredentialsProvider credsProvider = new BasicCredentialsProvider(); - credsProvider.setCredentials(new AuthScope(null, -1), - new UsernamePasswordCredentials(username, password)); - context.setCredentialsProvider(credsProvider); - - httpClient = HttpClients.createDefault(); - } catch (Exception ioe) { - close(); - logger.log(Level.SEVERE,"Fail to init Client",ioe); - throw new RuntimeException("Fail to init Client", ioe); - } + context = HttpClientContext.create(); + CredentialsProvider credsProvider = new BasicCredentialsProvider(); + credsProvider.setCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(username, password)); + context.setCredentialsProvider(credsProvider); + + httpClient = HttpClients.createDefault(); } public void close() { diff --git a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java similarity index 89% rename from src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java index 86b74b72f30..fe8f1ec9c70 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DOIEZIdServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdDOIProvider.java @@ -1,5 +1,10 @@ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.doi.ezid; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.ucsb.nceas.ezid.EZIDException; import edu.ucsb.nceas.ezid.EZIDService; @@ -13,40 +18,40 @@ * * @author skraffmiller */ -@Stateless -public class DOIEZIdServiceBean extends DOIServiceBean { +public class EZIdDOIProvider extends AbstractDOIProvider { - private static final Logger logger = Logger.getLogger(DOIEZIdServiceBean.class.getCanonicalName()); + + + private static final Logger logger = Logger.getLogger(EZIdDOIProvider.class.getCanonicalName()); EZIDService ezidService; - // This has a sane default in microprofile-config.properties - private final String baseUrl = JvmSettings.EZID_API_URL.lookup(); + public static final String TYPE = "ezid"; + + private String baseUrl; - public DOIEZIdServiceBean() { + + public EZIdDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, String baseUrl, String username, String password) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); // Creating the service doesn't do any harm, just initializing some object data here. // Makes sure we don't run into NPEs from the other methods, but will obviously fail if the // login below does not work. - this.ezidService = new EZIDService(this.baseUrl); + this.baseUrl = baseUrl; + this.ezidService = new EZIDService(baseUrl); try { - // These have (obviously) no default, but still are optional to make the provider optional - String username = JvmSettings.EZID_USERNAME.lookupOptional().orElse(null); - String password = JvmSettings.EZID_PASSWORD.lookupOptional().orElse(null); - - if (username != null ^ password != null) { - logger.log(Level.WARNING, "You must give both username and password. Will not try to login."); - } - + if (username != null && password != null) { this.ezidService.login(username, password); - this.configured = true; + } else { + logger.log(Level.WARNING, "You must give both username and password. Will not try to login."); } } catch (EZIDException e) { // We only do the warnings here, but the object still needs to be created. // The EJB stateless thing expects this to go through, and it is requested on any // global id parsing. - logger.log(Level.WARNING, "Login failed to {0}", this.baseUrl); + logger.log(Level.WARNING, "Login failed to {0}", baseUrl); logger.log(Level.WARNING, "Exception String: {0}", e.toString()); logger.log(Level.WARNING, "Localized message: {0}", e.getLocalizedMessage()); logger.log(Level.WARNING, "Cause:", e.getCause()); @@ -227,14 +232,14 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - return List.of("EZID", this.baseUrl); + return List.of(getId(), this.baseUrl); } @Override public String createIdentifier(DvObject dvObject) throws Throwable { logger.log(Level.FINE, "createIdentifier"); if(dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty() ){ - dvObject = generateIdentifier(dvObject); + dvObject = generatePid(dvObject); } String identifier = getIdentifier(dvObject); Map metadata = getMetadataForCreateIndicator(dvObject); @@ -271,7 +276,7 @@ public String createIdentifier(DvObject dvObject) throws Throwable { * @return A HashMap with the same values as {@code map} */ private HashMap asHashMap(Map map) { - return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map); + return (map instanceof HashMap) ? (HashMap)map : new HashMap<>(map); } @Override @@ -279,5 +284,9 @@ protected String getProviderKeyName() { return "EZID"; } + @Override + public String getProviderType() { + return TYPE; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java new file mode 100644 index 00000000000..95ad9bdeff0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/ezid/EZIdProviderFactory.java @@ -0,0 +1,42 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.ezid; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class EZIdProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(EZIdDOIProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String baseUrl = JvmSettings.EZID_API_URL.lookupOptional(providerId).orElse("https://ezid.cdlib.org"); + String username = JvmSettings.EZID_USERNAME.lookup(providerId); + String password = JvmSettings.EZID_PASSWORD.lookup(providerId); + + return new EZIdDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, + managedList, excludedList, baseUrl, username, password); + } + + public String getType() { + return EZIdDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java similarity index 59% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java index 3bd9d9dd022..a967fb40620 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/FakePidProviderServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeDOIProvider.java @@ -1,22 +1,22 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.doi.fake; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Logger; -import jakarta.ejb.Stateless; +public class FakeDOIProvider extends AbstractDOIProvider { -@Stateless -public class FakePidProviderServiceBean extends DOIServiceBean { + public static final String TYPE = "FAKE"; - private static final Logger logger = Logger.getLogger(FakePidProviderServiceBean.class.getCanonicalName()); + public FakeDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList) { + super(id, label, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + } - //Only need to check locally public boolean isGlobalIdUnique(GlobalId globalId) { try { @@ -29,7 +29,7 @@ public boolean isGlobalIdUnique(GlobalId globalId) { @Override public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { - boolean existsLocally = !dvObjectService.isGlobalIdLocallyUnique(globalId); + boolean existsLocally = !pidProviderService.isGlobalIdLocallyUnique(globalId); return existsLocally ? existsLocally : noProviderDefault; } @@ -40,7 +40,7 @@ public boolean registerWhenPublished() { @Override public List getProviderInformation() { - return List.of("FAKE", "https://dataverse.org"); + return List.of(getId(), "https://dataverse.org"); } @Override @@ -65,7 +65,10 @@ public void deleteIdentifier(DvObject dvo) throws Exception { } @Override - public boolean publicizeIdentifier(DvObject studyIn) { + public boolean publicizeIdentifier(DvObject dvObject) { + if(dvObject.isInstanceofDataFile() && dvObject.getGlobalId()==null) { + generatePid(dvObject); + } return true; } @@ -74,4 +77,9 @@ protected String getProviderKeyName() { return "FAKE"; } + @Override + public String getProviderType() { + return TYPE; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java new file mode 100644 index 00000000000..292c39d4383 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/fake/FakeProviderFactory.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.fake; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class FakeProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(FakeDOIProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse("");; + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse("");; + + return new FakeDOIProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList); + } + + public String getType() { + return FakeDOIProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java similarity index 87% rename from src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java index 4942db9e7ec..2627bc76fd9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java @@ -18,10 +18,12 @@ Version 3.0. */ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.pidproviders.handle; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import java.io.File; import java.io.FileInputStream; @@ -29,8 +31,6 @@ import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; -import jakarta.ejb.EJB; -import jakarta.ejb.Stateless; import java.security.PrivateKey; /* Handlenet imports: */ @@ -60,23 +60,32 @@ * As of now, it only does the registration updates, to accommodate * the modifyRegistration datasets API sub-command. */ -@Stateless -public class HandlenetServiceBean extends AbstractGlobalIdServiceBean { - - @EJB - DataverseServiceBean dataverseService; - @EJB - SettingsServiceBean settingsService; - private static final Logger logger = Logger.getLogger(HandlenetServiceBean.class.getCanonicalName()); +public class HandlePidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(HandlePidProvider.class.getCanonicalName()); public static final String HDL_PROTOCOL = "hdl"; - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); + public static final String TYPE = "hdl"; public static final String HTTP_HDL_RESOLVER_URL = "http://hdl.handle.net/"; public static final String HDL_RESOLVER_URL = "https://hdl.handle.net/"; + + - public HandlenetServiceBean() { - logger.log(Level.FINE,"Constructor"); - configured = true; + int handlenetIndex; + private boolean isIndependentHandleService; + private String authHandle; + private String keyPath; + private String keyPassphrase; + + public HandlePidProvider(String id, String label, String authority, String shoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, int index, boolean isIndependentService, String authHandle, String path, String passphrase) { + super(id, label, HDL_PROTOCOL, authority, shoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + this.handlenetIndex = index; + this.isIndependentHandleService = isIndependentService; + this.authHandle = authHandle; + this.keyPath = path; + this.keyPassphrase = passphrase; + } @Override @@ -231,10 +240,9 @@ private ResolutionRequest buildResolutionRequest(final String handle) { private PublicKeyAuthenticationInfo getAuthInfo(String handlePrefix) { logger.log(Level.FINE,"getAuthInfo"); byte[] key = null; - String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); + String adminCredFile = getKeyPath(); - key = readKey(adminCredFile); + key = readKey(adminCredFile); PrivateKey privkey = null; privkey = readPrivKey(key, adminCredFile); String authHandle = getAuthenticationHandle(handlePrefix); @@ -244,8 +252,8 @@ private PublicKeyAuthenticationInfo getAuthInfo(String handlePrefix) { } private String getRegistrationUrl(DvObject dvObject) { logger.log(Level.FINE,"getRegistrationUrl"); - String siteUrl = systemConfig.getDataverseSiteUrl(); - String targetUrl = siteUrl + dvObject.getTargetUrl() + "hdl:" + dvObject.getAuthority() + String siteUrl = SystemConfig.getDataverseSiteUrlStatic(); + String targetUrl = siteUrl + dvObject.getTargetUrl() + "hdl:" + dvObject.getAuthority() + "/" + dvObject.getIdentifier(); return targetUrl; } @@ -278,8 +286,7 @@ private PrivateKey readPrivKey(byte[] key, final String file) { try { byte[] secKey = null; if ( Util.requiresSecretKey(key) ) { - String secret = JvmSettings.HANDLENET_KEY_PASSPHRASE.lookup(); - secKey = secret.getBytes(StandardCharsets.UTF_8); + secKey = getKeyPassphrase().getBytes(StandardCharsets.UTF_8); } key = Util.decrypt(key, secKey); privkey = Util.getPrivateKeyFromBytes(key, 0); @@ -304,9 +311,9 @@ private String getAuthenticationHandle(DvObject dvObject){ private String getAuthenticationHandle(String handlePrefix) { logger.log(Level.FINE,"getAuthenticationHandle"); - if (systemConfig.getHandleAuthHandle()!=null) { - return systemConfig.getHandleAuthHandle(); - } else if (systemConfig.isIndependentHandleService()) { + if (getHandleAuthHandle()!=null) { + return getHandleAuthHandle(); + } else if (isIndependentHandleService()) { return handlePrefix + "/ADMIN"; } else { return "0.NA/" + handlePrefix; @@ -348,9 +355,8 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { String handle = getDvObjectHandle(dvObject); String authHandle = getAuthenticationHandle(dvObject); - String adminCredFile = JvmSettings.HANDLENET_KEY_PATH.lookup(); - int handlenetIndex = JvmSettings.HANDLENET_INDEX.lookup(Integer.class); - + String adminCredFile = getKeyPath(); + byte[] key = readKey(adminCredFile); PrivateKey privkey = readPrivKey(key, adminCredFile); @@ -383,7 +389,7 @@ private boolean updateIdentifierStatus(DvObject dvObject, String statusIn) { @Override public List getProviderInformation(){ - return List.of("Handle", "https://hdl.handle.net"); + return List.of(getId(), HDL_RESOLVER_URL); } @@ -401,7 +407,7 @@ public String createIdentifier(DvObject dvObject) throws Throwable { @Override public boolean publicizeIdentifier(DvObject dvObject) { if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()){ - generateIdentifier(dvObject); + generatePid(dvObject); } return updateIdentifierStatus(dvObject, "public"); @@ -438,6 +444,32 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden public String getUrlPrefix() { return HDL_RESOLVER_URL; } + + @Override + public String getProtocol() { + return HDL_PROTOCOL; + } + + @Override + public String getProviderType() { + return TYPE; + } + + public String getKeyPath() { + return keyPath; + } + + public String getKeyPassphrase() { + return keyPassphrase; + } + + public boolean isIndependentHandleService() { + return isIndependentHandleService; + } + + public String getHandleAuthHandle() { + return authHandle; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java new file mode 100644 index 00000000000..eef5bed8432 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandleProviderFactory.java @@ -0,0 +1,45 @@ +package edu.harvard.iq.dataverse.pidproviders.handle; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class HandleProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(HandlePidProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + int index = JvmSettings.HANDLENET_INDEX.lookupOptional(Integer.class, providerId).orElse(300); + boolean independentHandleService = JvmSettings.HANDLENET_INDEPENDENT_SERVICE + .lookupOptional(Boolean.class, providerId).orElse(false); + String handleAuthHandle = JvmSettings.HANDLENET_AUTH_HANDLE.lookup(providerId); + String path = JvmSettings.HANDLENET_KEY_PATH.lookup(providerId); + String passphrase = JvmSettings.HANDLENET_KEY_PASSPHRASE.lookup(providerId); + return new HandlePidProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, index, independentHandleService, handleAuthHandle, path, + passphrase); + } + + public String getType() { + return HandlePidProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java similarity index 61% rename from src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java rename to src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java index c856c5363e0..075e10d8164 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/UnmanagedHandlenetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/UnmanagedHandlePidProvider.java @@ -1,27 +1,32 @@ -package edu.harvard.iq.dataverse.pidproviders; +package edu.harvard.iq.dataverse.pidproviders.handle; -import edu.harvard.iq.dataverse.AbstractGlobalIdServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.HandlenetServiceBean; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; + import java.util.*; import java.util.logging.Level; import java.util.logging.Logger; import jakarta.ejb.Stateless; import org.apache.commons.lang3.NotImplementedException; -/** This class is just used to parse Handles that are not managed by any account configured in Dataverse - * It does not implement any of the methods related to PID CRUD +/** + * This class is just used to parse Handles that are not managed by any account + * configured in Dataverse It does not implement any of the methods related to + * PID CRUD * */ -@Stateless -public class UnmanagedHandlenetServiceBean extends AbstractGlobalIdServiceBean { - private static final Logger logger = Logger.getLogger(UnmanagedHandlenetServiceBean.class.getCanonicalName()); +public class UnmanagedHandlePidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(UnmanagedHandlePidProvider.class.getCanonicalName()); + public static final String ID = "UnmanagedHandleProvider"; - public UnmanagedHandlenetServiceBean() { + public UnmanagedHandlePidProvider() { + // Also using ID as label + super(ID, ID, HandlePidProvider.HDL_PROTOCOL); logger.log(Level.FINE, "Constructor"); - configured = true; + } @Override @@ -56,7 +61,7 @@ public void deleteIdentifier(DvObject dvObject) throws Exception { @Override public List getProviderInformation() { - return List.of("UnmanagedHandle", ""); + return List.of(getId(), ""); } @Override @@ -71,19 +76,18 @@ public boolean publicizeIdentifier(DvObject dvObject) { @Override public GlobalId parsePersistentId(String pidString) { - if (pidString.startsWith(HandlenetServiceBean.HDL_RESOLVER_URL)) { - pidString = pidString.replace(HandlenetServiceBean.HDL_RESOLVER_URL, - (HandlenetServiceBean.HDL_PROTOCOL + ":")); - } else if (pidString.startsWith(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL)) { - pidString = pidString.replace(HandlenetServiceBean.HTTP_HDL_RESOLVER_URL, - (HandlenetServiceBean.HDL_PROTOCOL + ":")); + if (pidString.startsWith(HandlePidProvider.HDL_RESOLVER_URL)) { + pidString = pidString.replace(HandlePidProvider.HDL_RESOLVER_URL, (HandlePidProvider.HDL_PROTOCOL + ":")); + } else if (pidString.startsWith(HandlePidProvider.HTTP_HDL_RESOLVER_URL)) { + pidString = pidString.replace(HandlePidProvider.HTTP_HDL_RESOLVER_URL, + (HandlePidProvider.HDL_PROTOCOL + ":")); } return super.parsePersistentId(pidString); } @Override public GlobalId parsePersistentId(String protocol, String identifierString) { - if (!HandlenetServiceBean.HDL_PROTOCOL.equals(protocol)) { + if (!HandlePidProvider.HDL_PROTOCOL.equals(protocol)) { return null; } GlobalId globalId = super.parsePersistentId(protocol, identifierString); @@ -92,7 +96,7 @@ public GlobalId parsePersistentId(String protocol, String identifierString) { @Override public GlobalId parsePersistentId(String protocol, String authority, String identifier) { - if (!HandlenetServiceBean.HDL_PROTOCOL.equals(protocol)) { + if (!HandlePidProvider.HDL_PROTOCOL.equals(protocol)) { return null; } return super.parsePersistentId(protocol, authority, identifier); @@ -100,6 +104,11 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden @Override public String getUrlPrefix() { - return HandlenetServiceBean.HDL_RESOLVER_URL; + return HandlePidProvider.HDL_RESOLVER_URL; + } + + @Override + public String getProviderType() { + return "unamagedHandle"; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java new file mode 100644 index 00000000000..91c7f527c88 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java @@ -0,0 +1,201 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +/** + * PermaLink provider This is a minimalist permanent ID provider intended for + * use with 'real' datasets/files where the use case none-the-less doesn't lend + * itself to the use of DOIs or Handles, e.g. * due to cost * for a + * catalog/archive where Dataverse has a dataset representing a dataset with + * DOI/handle stored elsewhere + * + * The initial implementation will mint identifiers locally and will provide the + * existing page URLs (using the ?persistentID= format). This will be + * overridable by a configurable parameter to support use of an external + * resolver. + * + */ +public class PermaLinkPidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(PermaLinkPidProvider.class.getCanonicalName()); + + public static final String PERMA_PROTOCOL = "perma"; + public static final String TYPE = "perma"; + public static final String SEPARATOR = ""; + + // ToDo - remove + @Deprecated + public static final String PERMA_RESOLVER_URL = JvmSettings.PERMALINK_BASE_URL.lookupOptional("permalink") + .orElse(SystemConfig.getDataverseSiteUrlStatic()); + + + private String separator = SEPARATOR; + + private String baseUrl; + + public PermaLinkPidProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, + String datafilePidFormat, String managedList, String excludedList, String baseUrl, String separator) { + super(id, label, PERMA_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, + managedList, excludedList); + this.baseUrl = baseUrl; + this.separator = separator; + } + + @Override + public String getSeparator() { + return separator; + } + + @Override + public boolean alreadyRegistered(GlobalId globalId, boolean noProviderDefault) { + // Perma doesn't manage registration, so we assume all local PIDs can be treated + // as registered + boolean existsLocally = !pidProviderService.isGlobalIdLocallyUnique(globalId); + return existsLocally ? existsLocally : noProviderDefault; + } + + @Override + public boolean registerWhenPublished() { + return false; + } + + @Override + public List getProviderInformation() { + return List.of(getId(), getBaseUrl()); + } + + @Override + public String createIdentifier(DvObject dvo) throws Throwable { + // Call external resolver and send landing URL? + // FWIW: Return value appears to only be used in RegisterDvObjectCommand where + // success requires finding the dvo identifier in this string. (Also logged a + // couple places). + return (dvo.getGlobalId().asString()); + } + + @Override + public Map getIdentifierMetadata(DvObject dvo) { + Map map = new HashMap<>(); + return map; + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvo) throws Exception { + return getTargetUrl(dvo); + } + + @Override + public void deleteIdentifier(DvObject dvo) throws Exception { + // no-op + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + // Generate if needed (i.e. datafile case where we don't create/register early + // (even with registerWhenPublished == false)) + if (dvObject.getIdentifier() == null || dvObject.getIdentifier().isEmpty()) { + dvObject = generatePid(dvObject); + } + // Call external resolver and send landing URL? + return true; + } + + @Override + public GlobalId parsePersistentId(String pidString) { + // ToDo - handle local PID resolver for dataset/file + logger.info("Parsing in Perma: " + pidString); + if (pidString.startsWith(getUrlPrefix())) { + pidString = pidString.replace(getUrlPrefix(), (PERMA_PROTOCOL + ":")); + } + return super.parsePersistentId(pidString); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + logger.info("Checking Perma: " + identifierString); + if (!PERMA_PROTOCOL.equals(protocol)) { + return null; + } + String cleanIdentifier = PERMA_PROTOCOL + ":" + identifierString; + // With permalinks, we have to check the sets before parsing since the permalinks in these sets could have different authority, spearator, and shoulders + if (getExcludedSet().contains(cleanIdentifier)) { + return null; + } + if(getManagedSet().contains(cleanIdentifier)) { + /** With a variable separator that could also be empty, there is no way to determine the authority and shoulder for an unknown permalink. + * Since knowing this split isn't relevant for permalinks except for minting, the code below just assumes the authority + * is the first 4 characters and that the separator and the shoulder are empty. + * If this is found to cause issues, users should be able to use a managed permalink provider as a work-around. The code here could + * be changed to allow default lengths for the authority, separator, and shoulder and/or to add a list of known (but unmanaged) authority, separator, shoulder combos. + */ + if(identifierString.length() < 4) { + return new GlobalId(protocol, "", identifierString, SEPARATOR, getUrlPrefix(), + getId()); + } + return new GlobalId(protocol, identifierString.substring(0,4), identifierString.substring(4), SEPARATOR, getUrlPrefix(), + getId()); + } + String identifier = null; + if (getAuthority() != null) { + if (identifierString.startsWith(getAuthority())) { + identifier = identifierString.substring(getAuthority().length()); + } else { + //Doesn't match authority + return null; + } + if (identifier.startsWith(separator)) { + identifier = identifier.substring(separator.length()); + } else { + //Doesn't match separator + return null; + } + } + identifier = PidProvider.formatIdentifierString(identifier); + if (PidProvider.testforNullTerminator(identifier)) { + return null; + } + if(!identifier.startsWith(getShoulder())) { + //Doesn't match shoulder + return null; + } + return new GlobalId(PERMA_PROTOCOL, getAuthority(), identifier, separator, getUrlPrefix(), getId()); + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if (!PERMA_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + @Override + public String getUrlPrefix() { + + return getBaseUrl() + "/citation?persistentId=" + PERMA_PROTOCOL + ":"; + } + + @Override + public String getProtocol() { + return PERMA_PROTOCOL; + } + + @Override + public String getProviderType() { + return PERMA_PROTOCOL; + } + + public String getBaseUrl() { + return baseUrl; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java new file mode 100644 index 00000000000..32b89223062 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkProviderFactory.java @@ -0,0 +1,43 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import com.google.auto.service.AutoService; + +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactory; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; + +@AutoService(PidProviderFactory.class) +public class PermaLinkProviderFactory implements PidProviderFactory { + + @Override + public PidProvider createPidProvider(String providerId) { + String providerType = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + if (!providerType.equals(PermaLinkPidProvider.TYPE)) { + // Being asked to create a non-EZId provider + return null; + } + String providerLabel = JvmSettings.PID_PROVIDER_LABEL.lookup(providerId); + String providerAuthority = JvmSettings.PID_PROVIDER_AUTHORITY.lookup(providerId); + String providerShoulder = JvmSettings.PID_PROVIDER_SHOULDER.lookupOptional(providerId).orElse(""); + String identifierGenerationStyle = JvmSettings.PID_PROVIDER_IDENTIFIER_GENERATION_STYLE + .lookupOptional(providerId).orElse("randomString"); + String datafilePidFormat = JvmSettings.PID_PROVIDER_DATAFILE_PID_FORMAT.lookupOptional(providerId) + .orElse(SystemConfig.DataFilePIDFormat.DEPENDENT.toString()); + String managedList = JvmSettings.PID_PROVIDER_MANAGED_LIST.lookupOptional(providerId).orElse(""); + String excludedList = JvmSettings.PID_PROVIDER_EXCLUDED_LIST.lookupOptional(providerId).orElse(""); + + String baseUrl = JvmSettings.PERMALINK_BASE_URL.lookupOptional(providerId) + .orElse(SystemConfig.getDataverseSiteUrlStatic()); + ; + String separator = JvmSettings.PERMALINK_SEPARATOR.lookupOptional(providerId).orElse(""); + + return new PermaLinkPidProvider(providerId, providerLabel, providerAuthority, providerShoulder, identifierGenerationStyle, + datafilePidFormat, managedList, excludedList, baseUrl, separator); + } + + public String getType() { + return PermaLinkPidProvider.TYPE; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java new file mode 100644 index 00000000000..b7961a41c50 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/UnmanagedPermaLinkPidProvider.java @@ -0,0 +1,114 @@ +package edu.harvard.iq.dataverse.pidproviders.perma; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.util.SystemConfig; + +import java.util.*; +import java.util.logging.Level; +import java.util.logging.Logger; +import jakarta.ejb.Stateless; +import org.apache.commons.lang3.NotImplementedException; + +/** This class is just used to parse Handles that are not managed by any account configured in Dataverse + * It does not implement any of the methods related to PID CRUD + * + */ +public class UnmanagedPermaLinkPidProvider extends AbstractPidProvider { + + private static final Logger logger = Logger.getLogger(UnmanagedPermaLinkPidProvider.class.getCanonicalName()); + public static final String ID = "UnmanagedPermaLinkProvider"; + + public UnmanagedPermaLinkPidProvider() { + // Also using ID as label + super(ID, ID, PermaLinkPidProvider.PERMA_PROTOCOL); + logger.log(Level.FINE, "Constructor"); + } + + @Override + public boolean canManagePID() { + return false; + } + + @Override + public boolean registerWhenPublished() { + throw new NotImplementedException(); + } + + @Override + public boolean alreadyRegistered(GlobalId pid, boolean noProviderDefault) throws Exception { + throw new NotImplementedException(); + } + + @Override + public Map getIdentifierMetadata(DvObject dvObject) { + throw new NotImplementedException(); + } + + @Override + public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { + throw new NotImplementedException(); + } + + @Override + public void deleteIdentifier(DvObject dvObject) throws Exception { + throw new NotImplementedException(); + } + + @Override + public List getProviderInformation() { + return List.of(getId(), ""); + } + + @Override + public String createIdentifier(DvObject dvObject) throws Throwable { + throw new NotImplementedException(); + } + + @Override + public boolean publicizeIdentifier(DvObject dvObject) { + throw new NotImplementedException(); + } + + @Override + public GlobalId parsePersistentId(String protocol, String identifierString) { + if (!PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { + return null; + } + /** With a variable separator that could also be empty, there is no way to determine the authority and shoulder for an unknown/unmanaged permalink. + * Since knowing this split isn't relevant for unmanaged permalinks, the code below just assumes the authority + * is the first 4 characters and that the separator and the shoulder are empty. + * If this is found to cause issues, users should be able to use a managed permalink provider as a work-around. The code here could + * be changed to allow default lengths for the authority, separator, and shoulder and/or to add a list of known (but unmanaged) authority, separator, shoulder combos. + */ + if(identifierString.length() < 4) { + logger.warning("A short unmanaged permalink was found - assuming the authority is empty: " + identifierString); + return super.parsePersistentId(protocol, "", identifierString); + } + return super.parsePersistentId(protocol, identifierString.substring(0, 4), identifierString.substring(4)); + } + + @Override + public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + if (!PermaLinkPidProvider.PERMA_PROTOCOL.equals(protocol)) { + return null; + } + return super.parsePersistentId(protocol, authority, identifier); + } + + @Override + public String getUrlPrefix() { + return SystemConfig.getDataverseSiteUrlStatic()+ "/citation?persistentId=" + PermaLinkPidProvider.PERMA_PROTOCOL + ":"; + } + + @Override + public String getProviderType() { + return PermaLinkPidProvider.TYPE; + } + + @Override + public String getSeparator() { + return PermaLinkPidProvider.SEPARATOR; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java index c252d2e3330..9edb536eda2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerPage.java @@ -11,6 +11,7 @@ import jakarta.inject.Named; import jakarta.json.JsonArray; +@Deprecated(forRemoval = true, since = "2024-07-07") @Stateless @Named public class RepositoryStorageAbstractionLayerPage { @@ -22,17 +23,20 @@ public class RepositoryStorageAbstractionLayerPage { @EJB StorageSiteServiceBean storageSiteServiceBean; + @Deprecated(forRemoval = true, since = "2024-07-07") public String getLocalDataAccessDirectory(DatasetVersion datasetVersion) { String localDataAccessParentDir = settingsService.getValueForKey(SettingsServiceBean.Key.LocalDataAccessPath); return RepositoryStorageAbstractionLayerUtil.getLocalDataAccessDirectory(localDataAccessParentDir, datasetVersion.getDataset()); } + @Deprecated(forRemoval = true, since = "2024-07-07") public List getRsyncSites(DatasetVersion datasetVersion) { List storageSites = storageSiteServiceBean.findAll(); JsonArray storageSitesAsJson = RepositoryStorageAbstractionLayerUtil.getStorageSitesAsJson(storageSites); return RepositoryStorageAbstractionLayerUtil.getRsyncSites(datasetVersion.getDataset(), storageSitesAsJson); } + @Deprecated(forRemoval = true, since = "2024-07-07") public String getVerifyDataCommand(DatasetVersion datasetVersion) { return RepositoryStorageAbstractionLayerUtil.getVerifyDataCommand(datasetVersion.getDataset()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java index 8501fba3ce0..0d547402676 100644 --- a/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/repositorystorageabstractionlayer/RepositoryStorageAbstractionLayerUtil.java @@ -13,10 +13,12 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +@Deprecated(forRemoval = true, since = "2024-07-07") public class RepositoryStorageAbstractionLayerUtil { private static final Logger logger = Logger.getLogger(RepositoryStorageAbstractionLayerUtil.class.getCanonicalName()); + @Deprecated(forRemoval = true, since = "2024-07-07") public static List getRsyncSites(Dataset dataset, JsonArray rsalSitesAsJson) { List rsalSites = new ArrayList<>(); boolean leafDirectoryOnly = false; @@ -30,6 +32,7 @@ public static List getRsyncSites(Dataset dataset, JsonArray rsalSites return rsalSites; } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getLocalDataAccessDirectory(String localDataAccessParentDir, Dataset dataset) { if (localDataAccessParentDir == null) { localDataAccessParentDir = File.separator + "UNCONFIGURED ( " + SettingsServiceBean.Key.LocalDataAccessPath + " )"; @@ -38,6 +41,7 @@ static String getLocalDataAccessDirectory(String localDataAccessParentDir, Datas return localDataAccessParentDir + File.separator + getDirectoryContainingTheData(dataset, leafDirectoryOnly); } + @Deprecated(forRemoval = true, since = "2024-07-07") static String getVerifyDataCommand(Dataset dataset) { boolean leafDirectoryOnly = true; // TODO: if "files.sha" is defined somewhere, use it. @@ -51,6 +55,7 @@ static String getVerifyDataCommand(Dataset dataset) { * leafDirectoryOnly. See also * http://www.gnu.org/software/coreutils/manual/html_node/basename-invocation.html */ + @Deprecated(forRemoval = true, since = "2024-07-07") public static String getDirectoryContainingTheData(Dataset dataset, boolean leafDirectoryOnly) { /** * FIXME: What if there is more than one package in the dataset? @@ -81,6 +86,7 @@ public static String getDirectoryContainingTheData(Dataset dataset, boolean leaf * RSAL or some other "big data" component live for a list of remotes sites * to which a particular dataset is replicated to. */ + @Deprecated(forRemoval = true, since = "2024-07-07") static JsonArray getStorageSitesAsJson(List storageSites) { JsonArrayBuilder arraybuilder = Json.createArrayBuilder(); if (storageSites == null || storageSites.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index d6d0be7a17b..26b42734d19 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -1,27 +1,7 @@ package edu.harvard.iq.dataverse.search; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.DataFileTag; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.DvObjectServiceBean; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.PermissionServiceBean; +import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; import edu.harvard.iq.dataverse.batch.util.LoggingUtil; @@ -33,6 +13,8 @@ import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil; import edu.harvard.iq.dataverse.datavariable.VariableServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.search.IndexableDataset.DatasetState; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -56,7 +38,9 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Future; +import java.util.concurrent.Semaphore; import java.util.function.Function; +import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; import jakarta.annotation.PostConstruct; @@ -68,6 +52,8 @@ import jakarta.ejb.Stateless; import jakarta.ejb.TransactionAttribute; import static jakarta.ejb.TransactionAttributeType.REQUIRES_NEW; + +import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.json.JsonObject; import jakarta.persistence.EntityManager; @@ -92,6 +78,9 @@ import org.apache.tika.sax.BodyContentHandler; import org.eclipse.microprofile.config.Config; import org.eclipse.microprofile.config.ConfigProvider; +import org.eclipse.microprofile.metrics.MetricUnits; +import org.eclipse.microprofile.metrics.Timer; +import org.eclipse.microprofile.metrics.annotation.Metric; import org.xml.sax.ContentHandler; @Stateless @@ -229,6 +218,9 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) solrInputDocument.addField(SearchFields.DATAVERSE_CATEGORY, dataverse.getIndexableCategoryName()); if (dataverse.isReleased()) { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); + if (FeatureFlags.ADD_PUBLICOBJECT_SOLR_FIELD.enabled()) { + solrInputDocument.addField(SearchFields.PUBLIC_OBJECT, true); + } solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getPublicationDate()); } else { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); @@ -332,14 +324,6 @@ public Future indexDataverse(Dataverse dataverse, boolean processPaths) logger.info(status); return new AsyncResult<>(status); } - try { - solrClientService.getSolrClient().commit(); - } catch (SolrServerException | IOException ex) { - status = ex.toString(); - logger.info(status); - return new AsyncResult<>(status); - } - dvObjectService.updateContentIndexTime(dataverse); IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse); String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage(); @@ -362,6 +346,29 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) { private static final Map NEXT_TO_INDEX = new ConcurrentHashMap<>(); // indexingNow is a set of dataset ids of datasets being indexed asynchronously right now private static final Map INDEXING_NOW = new ConcurrentHashMap<>(); + // semaphore for async indexing + private static final Semaphore ASYNC_INDEX_SEMAPHORE = new Semaphore(JvmSettings.MAX_ASYNC_INDEXES.lookupOptional(Integer.class).orElse(4), true); + + @Inject + @Metric(name = "index_permit_wait_time", absolute = true, unit = MetricUnits.NANOSECONDS, + description = "Displays how long does it take to receive a permit to index a dataset") + Timer indexPermitWaitTimer; + + @Inject + @Metric(name = "index_time", absolute = true, unit = MetricUnits.NANOSECONDS, + description = "Displays how long does it take to index a dataset") + Timer indexTimer; + + /** + * Try to acquire a permit from the semaphore avoiding too many parallel indexes, potentially overwhelming Solr. + * This method will time the duration waiting for the permit, allowing indexing performance to be measured. + * @throws InterruptedException + */ + private void acquirePermitFromSemaphore() throws InterruptedException { + try (var timeContext = indexPermitWaitTimer.time()) { + ASYNC_INDEX_SEMAPHORE.acquire(); + } + } // When you pass null as Dataset parameter to this method, it indicates that the indexing of the dataset with "id" has finished // Pass non-null Dataset to schedule it for indexing @@ -406,10 +413,24 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { */ @Asynchronous public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + try { + acquirePermitFromSemaphore(); + doAyncIndexDataset(dataset, doNormalSolrDocCleanUp); + } catch (InterruptedException e) { + String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); + } finally { + ASYNC_INDEX_SEMAPHORE.release(); + } + } + + private void doAyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { Long id = dataset.getId(); Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) while (next != null) { - try { + // Time context will automatically start on creation and stop when leaving the try block + try (var timeContext = indexTimer.time()) { indexDataset(next, doNormalSolrDocCleanUp); } catch (Exception e) { // catch all possible exceptions; otherwise when something unexpected happes the dataset wold remain locked and impossible to reindex String failureLogText = "Indexing failed. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); @@ -420,9 +441,19 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { } } + @Asynchronous public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDocCleanUp) { for(Dataset dataset : datasets) { - asyncIndexDataset(dataset, true); + try { + acquirePermitFromSemaphore(); + doAyncIndexDataset(dataset, true); + } catch (InterruptedException e) { + String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); + failureLogText += "\r\n" + e.getLocalizedMessage(); + LoggingUtil.writeOnSuccessFailureLog(null, failureLogText, dataset); + } finally { + ASYNC_INDEX_SEMAPHORE.release(); + } } } @@ -434,7 +465,7 @@ public void indexDvObject(DvObject objectIn) throws SolrServerException, IOExce } } - private void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { + public void indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) throws SolrServerException, IOException { doIndexDataset(dataset, doNormalSolrDocCleanUp); updateLastIndexedTime(dataset.getId()); } @@ -445,94 +476,160 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr * @todo should we use solrDocIdentifierDataset or * IndexableObject.IndexableTypes.DATASET.getName() + "_" ? */ - // String solrIdPublished = solrDocIdentifierDataset + dataset.getId(); String solrIdPublished = determinePublishedDatasetSolrDocId(dataset); String solrIdDraftDataset = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.WORKING_COPY.getSuffix(); - // String solrIdDeaccessioned = IndexableObject.IndexableTypes.DATASET.getName() - // + "_" + dataset.getId() + - // IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); StringBuilder debug = new StringBuilder(); debug.append("\ndebug:\n"); - int numPublishedVersions = 0; - List versions = dataset.getVersions(); - List solrIdsOfFilesToDelete = new ArrayList<>(); - for (DatasetVersion datasetVersion : versions) { - Long versionDatabaseId = datasetVersion.getId(); - String versionTitle = datasetVersion.getTitle(); - String semanticVersion = datasetVersion.getSemanticVersion(); - DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); - if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { - numPublishedVersions += 1; - } - debug.append("version found with database id " + versionDatabaseId + "\n"); - debug.append("- title: " + versionTitle + "\n"); - debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); - List fileMetadatas = datasetVersion.getFileMetadatas(); - List fileInfo = new ArrayList<>(); - for (FileMetadata fileMetadata : fileMetadatas) { - String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); - /** - * It sounds weird but the first thing we'll do is preemptively - * delete the Solr documents of all published files. Don't - * worry, published files will be re-indexed later along with - * the dataset. We do this so users can delete files from - * published versions of datasets and then re-publish a new - * version without fear that their old published files (now - * deleted from the latest published version) will be - * searchable. See also - * https://github.com/IQSS/dataverse/issues/762 - */ - solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); - fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); - } - try { - /** - * Preemptively delete *all* Solr documents for files associated - * with the dataset based on a Solr query. - * - * We must query Solr for this information because the file has - * been deleted from the database ( perhaps when Solr was down, - * as reported in https://github.com/IQSS/dataverse/issues/2086 - * ) so the database doesn't even know about the file. It's an - * orphan. - * - * @todo This Solr query should make the iteration above based - * on the database unnecessary because it the Solr query should - * find all files for the dataset. We can probably remove the - * iteration above after an "index all" has been performed. - * Without an "index all" we won't be able to find files based - * on parentId because that field wasn't searchable in 4.0. - * - * @todo We should also delete the corresponding Solr - * "permission" documents for the files. - */ - List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); - solrIdsOfFilesToDelete.addAll(allFilesForDataset); - } catch (SearchException | NullPointerException ex) { - logger.fine("could not run search of files to delete: " + ex); + boolean reduceSolrDeletes = FeatureFlags.REDUCE_SOLR_DELETES.enabled(); + if (!reduceSolrDeletes) { + int numPublishedVersions = 0; + List versions = dataset.getVersions(); + List solrIdsOfFilesToDelete = new ArrayList<>(); + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileMetadatas = datasetVersion.getFileMetadatas(); + List fileInfo = new ArrayList<>(); + for (FileMetadata fileMetadata : fileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); + /** + * It sounds weird but the first thing we'll do is preemptively + * delete the Solr documents of all published files. Don't + * worry, published files will be re-indexed later along with + * the dataset. We do this so users can delete files from + * published versions of datasets and then re-publish a new + * version without fear that their old published files (now + * deleted from the latest published version) will be + * searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + try { + /** + * Preemptively delete *all* Solr documents for files associated + * with the dataset based on a Solr query. + * + * We must query Solr for this information because the file has + * been deleted from the database ( perhaps when Solr was down, + * as reported in https://github.com/IQSS/dataverse/issues/2086 + * ) so the database doesn't even know about the file. It's an + * orphan. + * + * @todo This Solr query should make the iteration above based + * on the database unnecessary because it the Solr query should + * find all files for the dataset. We can probably remove the + * iteration above after an "index all" has been performed. + * Without an "index all" we won't be able to find files based + * on parentId because that field wasn't searchable in 4.0. + * + * @todo We should also delete the corresponding Solr + * "permission" documents for the files. + */ + List allFilesForDataset = findFilesOfParentDataset(dataset.getId()); + solrIdsOfFilesToDelete.addAll(allFilesForDataset); + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); } - int numFiles = 0; - if (fileMetadatas != null) { - numFiles = fileMetadatas.size(); + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); + if (doNormalSolrDocCleanUp) { + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } - debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); - } - debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); - if (doNormalSolrDocCleanUp) { - IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); - debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); } DatasetVersion latestVersion = dataset.getLatestVersion(); - String latestVersionStateString = latestVersion.getVersionState().name(); DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); + String latestVersionStateString = latestVersionState.name(); DatasetVersion releasedVersion = dataset.getReleasedVersion(); boolean atLeastOnePublishedVersion = false; if (releasedVersion != null) { atLeastOnePublishedVersion = true; - } else { - atLeastOnePublishedVersion = false; } + if (reduceSolrDeletes) { + List solrIdsOfDocsToDelete = null; + if (logger.isLoggable(Level.FINE)) { + writeDebugInfo(debug, dataset); + } + if (doNormalSolrDocCleanUp) { + try { + solrIdsOfDocsToDelete = findFilesOfParentDataset(dataset.getId()); + logger.fine("Existing file docs: " + String.join(", ", solrIdsOfDocsToDelete)); + if (!solrIdsOfDocsToDelete.isEmpty()) { + // We keep the latest version's docs unless it is deaccessioned and there is no + // published/released version + // So skip the loop removing those docs from the delete list except in that case + if ((!latestVersion.isDeaccessioned() || atLeastOnePublishedVersion)) { + List latestFileMetadatas = latestVersion.getFileMetadatas(); + String suffix = (new IndexableDataset(latestVersion)).getDatasetState().getSuffix(); + for (FileMetadata fileMetadata : latestFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId() + suffix; + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } + } + if (releasedVersion != null && !releasedVersion.equals(latestVersion)) { + List releasedFileMetadatas = releasedVersion.getFileMetadatas(); + for (FileMetadata fileMetadata : releasedFileMetadatas) { + String solrIdOfPublishedFile = solrDocIdentifierFile + + fileMetadata.getDataFile().getId(); + solrIdsOfDocsToDelete.remove(solrIdOfPublishedFile); + } + } + } + // Clear any unused dataset docs + if (!latestVersion.isDraft()) { + // The latest version is released, so should delete any draft docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdDraftDataset); + } + if (!atLeastOnePublishedVersion) { + // There's no released version, so should delete any normal state docs for the + // dataset + solrIdsOfDocsToDelete.add(solrIdPublished); + } + if (atLeastOnePublishedVersion || !latestVersion.isDeaccessioned()) { + // There's a released version or a draft, so should delete any deaccessioned + // state docs for the dataset + solrIdsOfDocsToDelete.add(solrIdDeaccessioned); + } + } catch (SearchException | NullPointerException ex) { + logger.fine("could not run search of files to delete: " + ex); + } + logger.fine("Solr docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + if (!solrIdsOfDocsToDelete.isEmpty()) { + List solrIdsOfPermissionDocsToDelete = new ArrayList<>(); + for (String file : solrIdsOfDocsToDelete) { + // Also remove associated permission docs + solrIdsOfPermissionDocsToDelete.add(file + discoverabilityPermissionSuffix); + } + solrIdsOfDocsToDelete.addAll(solrIdsOfPermissionDocsToDelete); + logger.fine("Solr docs and perm docs to delete: " + String.join(", ", solrIdsOfDocsToDelete)); + + IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService + .deleteMultipleSolrIds(solrIdsOfDocsToDelete); + debug.append("result of attempt to premptively deleted published files before reindexing: " + + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); + } + } + } + Map desiredCards = new LinkedHashMap<>(); /** * @todo refactor all of this below and have a single method that takes @@ -555,7 +652,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { String deleteDeaccessionedResult = removeDeaccessioned(dataset); results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") .append(solrIdDeaccessioned).append(" (and files). Result: ") @@ -563,7 +660,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { String deletePublishedResults = removePublished(dataset); results.append("No published version. Attempting to delete traces of published version from index. Result: ") .append(deletePublishedResults).append("\n"); @@ -606,13 +703,13 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.RELEASED, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { String deletePublishedResults = removePublished(dataset); results.append("No published version. Attempting to delete traces of published version from index. Result: ").append(deletePublishedResults).append("\n"); } desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); @@ -660,7 +757,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DRAFT, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { List solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); @@ -669,7 +766,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { String deleteDeaccessionedResult = removeDeaccessioned(dataset); results.append("No need for deaccessioned version. Deletion attempted for ") .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); @@ -720,7 +817,7 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); - if (doNormalSolrDocCleanUp) { + if (!reduceSolrDeletes && doNormalSolrDocCleanUp) { String deleteDeaccessionedResult = removeDeaccessioned(dataset); results.append("No need for deaccessioned version. Deletion attempted for ") .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); @@ -762,11 +859,42 @@ private void doIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) thr } } - private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { - String deleteDraftFilesResults = ""; - IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); - deleteDraftFilesResults = indexResponse.toString(); - return deleteDraftFilesResults; + private void writeDebugInfo(StringBuilder debug, Dataset dataset) { + List versions = dataset.getVersions(); + int numPublishedVersions = 0; + for (DatasetVersion datasetVersion : versions) { + Long versionDatabaseId = datasetVersion.getId(); + String versionTitle = datasetVersion.getTitle(); + String semanticVersion = datasetVersion.getSemanticVersion(); + DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); + if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { + numPublishedVersions += 1; + } + debug.append("version found with database id " + versionDatabaseId + "\n"); + debug.append("- title: " + versionTitle + "\n"); + debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); + List fileInfo = new ArrayList<>(); + List fileMetadatas = datasetVersion.getFileMetadatas(); + + for (FileMetadata fileMetadata : fileMetadatas) { + /** + * It sounds weird but the first thing we'll do is preemptively delete the Solr + * documents of all published files. Don't worry, published files will be + * re-indexed later along with the dataset. We do this so users can delete files + * from published versions of datasets and then re-publish a new version without + * fear that their old published files (now deleted from the latest published + * version) will be searchable. See also + * https://github.com/IQSS/dataverse/issues/762 + */ + fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); + } + int numFiles = 0; + if (fileMetadatas != null) { + numFiles = fileMetadatas.size(); + } + debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); + } + debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); } private IndexResponse indexDatasetPermissions(Dataset dataset) { @@ -803,17 +931,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set langs = settingsService.getConfiguredLanguages(); Map cvocMap = datasetFieldService.getCVocConf(true); + Map> cvocManagedFieldMap = new HashMap<>(); + for (Map.Entry cvocEntry : cvocMap.entrySet()) { + if(cvocEntry.getValue().containsKey("managed-fields")) { + JsonObject managedFields = cvocEntry.getValue().getJsonObject("managed-fields"); + Set managedFieldValues = new HashSet<>(); + for (String s : managedFields.keySet()) { + managedFieldValues.add(managedFields.getString(s)); + } + cvocManagedFieldMap.put(cvocEntry.getKey(), managedFieldValues); + } + } + + + Set metadataBlocksWithValue = new HashSet<>(); for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { @@ -963,36 +1107,73 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set vals = dsf.getValues_nondisplay(); - Set searchStrings = new HashSet(); + Set searchStrings = new HashSet<>(); for (String val: vals) { searchStrings.add(val); - searchStrings.addAll(datasetFieldService.getStringsFor(val)); + // Try to get string values from externalvocabularyvalue using val as termUri + searchStrings.addAll(datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), dsfType.getName())); + + if(dsfType.getParentDatasetFieldType()!=null) { + List childDatasetFields = dsf.getParentDatasetFieldCompoundValue().getChildDatasetFields(); + for (DatasetField df : childDatasetFields) { + if(cvocManagedFieldMap.get(dsfType.getId()).contains(df.getDatasetFieldType().getName())) { + String solrManagedFieldSearchable = df.getDatasetFieldType().getSolrField().getNameSearchable(); + // Try to get string values from externalvocabularyvalue but for a managed fields of the CVOCConf + Set stringsForManagedField = datasetFieldService.getIndexableStringsByTermUri(val, cvocMap.get(dsfType.getId()), df.getDatasetFieldType().getName()); + logger.fine(solrManagedFieldSearchable + " filled with externalvocabularyvalue : " + stringsForManagedField); + //.addField works as addition of value not a replace of value + // it allows to add mapped values by CVOCConf before or after indexing real DatasetField value(s) of solrManagedFieldSearchable + solrInputDocument.addField(solrManagedFieldSearchable, stringsForManagedField); + } + } + } } + logger.fine(solrFieldSearchable + " filled with externalvocabularyvalue : " + searchStrings); solrInputDocument.addField(solrFieldSearchable, searchStrings); if (dsfType.getSolrField().isFacetable()) { + logger.fine(solrFieldFacetable + " gets " + vals); solrInputDocument.addField(solrFieldFacetable, vals); } } + if (dsfType.isControlledVocabulary()) { - for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) { - if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) { - continue; - } + /** If the cvv list is empty but the dfv list is not then it is assumed this was harvested + * from an installation that had controlled vocabulary entries that don't exist in our this db + * @see Feature Request/Idea: Harvest metadata values that aren't from a list of controlled values #9992 + */ + if (dsf.getControlledVocabularyValues().isEmpty()) { + for (DatasetFieldValue dfv : dsf.getDatasetFieldValues()) { + if (dfv.getValue() == null || dfv.getValue().equals(DatasetField.NA_VALUE)) { + continue; + } + solrInputDocument.addField(solrFieldSearchable, dfv.getValue()); - // Index in all used languages (display and metadata languages - if (!dsfType.isAllowMultiples() || langs.isEmpty()) { - solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue()); - } else { - for(String locale: langs) { - solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getLocaleStrValue(locale)); + if (dsfType.getSolrField().isFacetable()) { + solrInputDocument.addField(solrFieldFacetable, dfv.getValue()); } } + } else { + for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) { + if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) { + continue; + } - if (dsfType.getSolrField().isFacetable()) { - solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue()); + // Index in all used languages (display and metadata languages + if (!dsfType.isAllowMultiples() || langs.isEmpty()) { + solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue()); + } else { + for(String locale: langs) { + solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getLocaleStrValue(locale)); + } + } + + if (dsfType.getSolrField().isFacetable()) { + solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue()); + } } } } else if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) { @@ -1072,13 +1253,17 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set accessObject = null; InputStream instream = null; ContentHandler textHandler = null; @@ -1259,6 +1455,7 @@ public SolrInputDocuments toSolrDocs(IndexableDataset indexableDataset, Set d try { solrClientService.getSolrClient().add(docs.getDocuments()); - solrClientService.getSolrClient().commit(); } catch (SolrServerException | IOException ex) { if (ex.getCause() instanceof SolrServerException) { throw new SolrServerException(ex); @@ -1492,6 +1701,7 @@ private void updateLastIndexedTimeInNewTransaction(Long id) { DvObject dvObjectToModify = em.find(DvObject.class, id); dvObjectToModify.setIndexTime(new Timestamp(new Date().getTime())); dvObjectToModify = em.merge(dvObjectToModify); + em.flush(); } /** @@ -1628,6 +1838,16 @@ private List getDataversePathsFromSegments(List dataversePathSeg return subtrees; } + private void addLicenseToSolrDoc(SolrInputDocument solrInputDocument, DatasetVersion datasetVersion) { + if (datasetVersion != null && datasetVersion.getTermsOfUseAndAccess() != null) { + String licenseName = "Custom Terms"; + if(datasetVersion.getTermsOfUseAndAccess().getLicense() != null) { + licenseName = datasetVersion.getTermsOfUseAndAccess().getLicense().getName(); + } + solrInputDocument.addField(SearchFields.DATASET_LICENSE, licenseName); + } + } + private void addDataverseReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataverse dataverse) { if (dataverse.getPublicationDate() != null) { Calendar calendar = Calendar.getInstance(); @@ -1705,7 +1925,6 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); UpdateResponse addResponse = solrClientService.getSolrClient().add(sid); - UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); if (object.isInstanceofDataset()) { for (DataFile df : dataset.getFiles()) { solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString())); @@ -1719,7 +1938,6 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc sid.removeField(SearchFields.SUBTREE); sid.addField(SearchFields.SUBTREE, paths); addResponse = solrClientService.getSolrClient().add(sid); - commitResponse = solrClientService.getSolrClient().commit(); } } } @@ -1765,11 +1983,6 @@ public String delete(Dataverse doomed) { } catch (SolrServerException | IOException ex) { return ex.toString(); } - try { - solrClientService.getSolrClient().commit(); - } catch (SolrServerException | IOException ex) { - return ex.toString(); - } String response = "Successfully deleted dataverse " + doomed.getId() + " from Solr index. updateReponse was: " + updateResponse.toString(); logger.fine(response); return response; @@ -1790,11 +2003,6 @@ public String removeSolrDocFromIndex(String doomed) { } catch (SolrServerException | IOException ex) { return ex.toString(); } - try { - solrClientService.getSolrClient().commit(); - } catch (SolrServerException | IOException ex) { - return ex.toString(); - } String response = "Attempted to delete " + doomed + " from Solr index. updateReponse was: " + updateResponse.toString(); logger.fine(response); return response; @@ -1836,6 +2044,7 @@ private String determineDeaccessionedDatasetId(Dataset dataset) { return IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); } + //Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled private String removeDeaccessioned(Dataset dataset) { StringBuilder result = new StringBuilder(); String deleteDeaccessionedResult = removeSolrDocFromIndex(determineDeaccessionedDatasetId(dataset)); @@ -1846,6 +2055,7 @@ private String removeDeaccessioned(Dataset dataset) { return result.toString(); } + //Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled private String removePublished(Dataset dataset) { StringBuilder result = new StringBuilder(); String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); @@ -1855,6 +2065,14 @@ private String removePublished(Dataset dataset) { result.append(deleteFilesResult); return result.toString(); } + + // Only used when FeatureFlags.REDUCE_SOLR_DELETES is disabled + private String deleteDraftFiles(List solrDocIdsForDraftFilesToDelete) { + String deleteDraftFilesResults = ""; + IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); + deleteDraftFilesResults = indexResponse.toString(); + return deleteDraftFilesResults; + } private Dataverse findRootDataverseCached() { if (true) { @@ -1988,8 +2206,50 @@ public List findPermissionsInSolrOnly() throws SearchException { SolrDocumentList list = rsp.getResults(); for (SolrDocument doc: list) { long id = Long.parseLong((String) doc.getFieldValue(SearchFields.DEFINITION_POINT_DVOBJECT_ID)); + String docId = (String)doc.getFieldValue(SearchFields.ID); if(!dvObjectService.checkExists(id)) { - permissionInSolrOnly.add((String)doc.getFieldValue(SearchFields.ID)); + permissionInSolrOnly.add(docId); + } else { + DvObject obj = dvObjectService.findDvObject(id); + if (obj instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + if (docId.endsWith("draft_permission")) { + if (!dv.isDraft()) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!dv.isDeaccessioned()) { + permissionInSolrOnly.add(docId); + } + } else { + if (d.getReleasedVersion() == null) { + permissionInSolrOnly.add(docId); + } + } + } else if (obj instanceof DataFile f) { + List states = dataFileService.findVersionStates(f.getId()); + Set strings = states.stream().map(VersionState::toString).collect(Collectors.toSet()); + logger.fine("States for " + docId + ": " + String.join(", ", strings)); + if (docId.endsWith("draft_permission")) { + if (!states.contains(VersionState.DRAFT)) { + permissionInSolrOnly.add(docId); + } + } else if (docId.endsWith("deaccessioned_permission")) { + if (!states.contains(VersionState.DEACCESSIONED) && states.size() == 1) { + permissionInSolrOnly.add(docId); + } + } else { + if (!states.contains(VersionState.RELEASED)) { + permissionInSolrOnly.add(docId); + } else { + if(dataFileService.findFileMetadataByDatasetVersionIdAndDataFileId(f.getOwner().getReleasedVersion().getId(), f.getId()) == null) { + logger.fine("Adding doc " + docId + " to list of permissions in Solr only"); + permissionInSolrOnly.add(docId); + } + } + + } + } } } if (cursorMark.equals(nextCursorMark)) { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchConstants.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchConstants.java index 73b39332013..2d6632760fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchConstants.java @@ -40,5 +40,5 @@ public class SearchConstants { public static final String RESTRICTED = "Restricted"; public static final String EMBARGOEDTHENRESTRICTED = "EmbargoedThenRestricted"; public static final String EMBARGOEDTHENPUBLIC = "EmbargoedThenPublic"; - + public static final String RETENTIONEXPIRED = "RetentionPeriodExpired"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index 8fb7c161517..02649cec68c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -217,6 +217,15 @@ public class SearchFields { public static final String DEFINITION_POINT_DVOBJECT_ID = "definitionPointDvObjectId"; public static final String DISCOVERABLE_BY = "discoverableBy"; + /** + * publicObject_b is an experimental field tied to the + * avoid-expensive-solr-join feature flag. Rather than discoverableBy which + * is a field on permission documents, publicObject_b is a field on content + * documents (dvObjects). By indexing publicObject_b=true, we can let guests + * search on it, avoiding an expensive join for those (common) users. + */ + public static final String PUBLIC_OBJECT = "publicObject_b"; + /** * i.e. "Unpublished", "Draft" (multivalued) */ @@ -267,7 +276,7 @@ more targeted results for just datasets. The format is YYYY (i.e. public static final String FULL_TEXT = "_text_"; public static final String EMBARGO_END_DATE = "embargoEndDate"; - + public static final String RETENTION_END_DATE = "retentionEndDate"; // SpatialRecursivePrefixTreeFieldType: https://solr.apache.org/guide/8_11/spatial-search.html#rpt public static final String GEOLOCATION = "geolocation"; @@ -276,4 +285,6 @@ more targeted results for just datasets. The format is YYYY (i.e. public static final String DATASET_VALID = "datasetValid"; + public static final String DATASET_LICENSE = "license"; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 2ce06541afa..4f3f6e46e48 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -21,7 +21,9 @@ import edu.harvard.iq.dataverse.SettingsWrapper; import edu.harvard.iq.dataverse.ThumbnailServiceWrapper; import edu.harvard.iq.dataverse.WidgetWrapper; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import java.time.LocalDate; import java.util.ArrayList; @@ -120,7 +122,6 @@ public class SearchIncludeFragment implements java.io.Serializable { private Long facetCountDatasets = 0L; private Long facetCountFiles = 0L; Map previewCountbyType = new HashMap<>(); - private SolrQueryResponse solrQueryResponseAllTypes; private String sortField; private SortOrder sortOrder; private String currentSort; @@ -132,6 +133,7 @@ public class SearchIncludeFragment implements java.io.Serializable { Map datasetfieldFriendlyNamesBySolrField = new HashMap<>(); Map staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); private boolean solrIsDown = false; + private boolean solrIsTemporarilyUnavailable = false; private Map numberOfFacets = new HashMap<>(); // private boolean showUnpublished; List filterQueriesDebug = new ArrayList<>(); @@ -279,8 +281,9 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused SolrQueryResponse solrQueryResponse = null; + SolrQueryResponse solrQueryResponseSecondPass = null; - List filterQueriesFinal = new ArrayList<>(); + List filterQueriesExtended = new ArrayList<>(); if (dataverseAlias != null) { this.dataverse = dataverseService.findByAlias(dataverseAlias); @@ -294,7 +297,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * @todo centralize this into SearchServiceBean */ if (!isfilterQueryAlreadyInMap(filterDownToSubtree)){ - filterQueriesFinal.add(filterDownToSubtree); + filterQueriesExtended.add(filterDownToSubtree); } // this.dataverseSubtreeContext = dataversePath; } else { @@ -307,22 +310,23 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused this.setRootDv(true); } + filterQueriesExtended.addAll(filterQueries); + + /** + * Add type queries, for the types (Dataverses, Datasets, Datafiles) + * currently selected: + */ selectedTypesList = new ArrayList<>(); String[] parts = selectedTypesString.split(":"); selectedTypesList.addAll(Arrays.asList(parts)); - - List filterQueriesFinalAllTypes = new ArrayList<>(); + String[] arr = selectedTypesList.toArray(new String[selectedTypesList.size()]); selectedTypesHumanReadable = combine(arr, " OR "); if (!selectedTypesHumanReadable.isEmpty()) { typeFilterQuery = SearchFields.TYPE + ":(" + selectedTypesHumanReadable + ")"; - } - - filterQueriesFinal.addAll(filterQueries); - filterQueriesFinalAllTypes.addAll(filterQueriesFinal); - - String allTypesFilterQuery = SearchFields.TYPE + ":(dataverses OR datasets OR files)"; - filterQueriesFinalAllTypes.add(allTypesFilterQuery); + } + List filterQueriesFinal = new ArrayList<>(); + filterQueriesFinal.addAll(filterQueriesExtended); filterQueriesFinal.add(typeFilterQuery); if (page <= 1) { @@ -344,6 +348,7 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused try { logger.fine("ATTENTION! query from user: " + query); logger.fine("ATTENTION! queryToPassToSolr: " + queryToPassToSolr); + logger.fine("ATTENTION! filterQueriesFinal: " + filterQueriesFinal.toString()); logger.fine("ATTENTION! sort by: " + sortField); /** @@ -351,22 +356,82 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused * https://github.com/IQSS/dataverse/issues/84 */ int numRows = 10; - HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); - DataverseRequest dataverseRequest = new DataverseRequest(session.getUser(), httpServletRequest); + DataverseRequest dataverseRequest = getDataverseRequest(); List dataverses = new ArrayList<>(); dataverses.add(dataverse); - solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); + solrQueryResponse = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinal, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null, !isFacetsDisabled(), true); if (solrQueryResponse.hasError()){ logger.info(solrQueryResponse.getError()); setSolrErrorEncountered(true); + } + // Solr "temporarily unavailable" is the condition triggered by + // receiving a 503 from the search engine, that is in turn a result + // of one of the Solr "circuit breakers" being triggered by excessive + // load. We treat this condition as distinct from "Solr is down", + // on the assumption that it is transitive. + if (solrQueryResponse.isSolrTemporarilyUnavailable()) { + setSolrTemporarilyUnavailable(true); } // This 2nd search() is for populating the "type" ("dataverse", "dataset", "file") facets: -- L.A. // (why exactly do we need it, again?) // To get the counts we display in the types facets particulary for unselected types - SEK 08/25/2021 - solrQueryResponseAllTypes = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalAllTypes, sortField, sortOrder.toString(), paginationStart, onlyDataRelatedToMe, numRows, false, null, null); - if (solrQueryResponse.hasError()){ - logger.info(solrQueryResponse.getError()); - setSolrErrorEncountered(true); + // Sure, but we should not waste resources here. We will try to save + // solr some extra work and a) only run this second query IF there is + // one or more unselected type facets; and b) drop all the extra + // parameters from this second query - such as facets and highlights - + // that we do not actually need for the purposes of finding these + // extra numbers. -- L.A. 10/16/2023 + + // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 + previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); + previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); + + + // This will populate the type facet counts for the types that are + // currently selected on the collection page: + for (FacetCategory facetCategory : solrQueryResponse.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + + if (!wasSolrErrorEncountered() && selectedTypesList.size() < 3 && !isSolrTemporarilyUnavailable() && !isFacetsDisabled() && !isUncheckedTypesFacetDisabled()) { + // If some types are NOT currently selected, we will need to + // run a second search to obtain the numbers of the unselected types: + + List filterQueriesFinalSecondPass = new ArrayList<>(); + filterQueriesFinalSecondPass.addAll(filterQueriesExtended); + + arr = new String[3]; + int c = 0; + for (String dvObjectType : Arrays.asList("dataverses", "datasets", "files")) { + if (!selectedTypesList.contains(dvObjectType)) { + arr[c++] = dvObjectType; + } + } + filterQueriesFinalSecondPass.add(SearchFields.TYPE + ":(" + combine(arr, " OR ", c) + ")"); + logger.fine("second pass query: " + queryToPassToSolr); + logger.fine("second pass filter query: "+filterQueriesFinalSecondPass.toString()); + + solrQueryResponseSecondPass = searchService.search(dataverseRequest, dataverses, queryToPassToSolr, filterQueriesFinalSecondPass, null, sortOrder.toString(), 0, onlyDataRelatedToMe, 1, false, null, null, false, false); + + if (solrQueryResponseSecondPass != null) { + + if (solrQueryResponseSecondPass.hasError()) { + logger.fine(solrQueryResponseSecondPass.getError()); + setSolrErrorEncountered(true); + } + + // And now populate the remaining type facets: + for (FacetCategory facetCategory : solrQueryResponseSecondPass.getTypeFacetCategories()) { + for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { + previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); + } + } + } else { + logger.warning("null solr response from the second pass type query"); + } } } catch (SearchException ex) { @@ -446,17 +511,6 @@ The real issue here (https://github.com/IQSS/dataverse/issues/7304) is caused } } - // populate preview counts: https://redmine.hmdc.harvard.edu/issues/3560 - previewCountbyType.put(BundleUtil.getStringFromBundle("dataverses"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("datasets"), 0L); - previewCountbyType.put(BundleUtil.getStringFromBundle("files"), 0L); - if (solrQueryResponseAllTypes != null) { - for (FacetCategory facetCategory : solrQueryResponseAllTypes.getTypeFacetCategories()) { - for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { - previewCountbyType.put(facetLabel.getName(), facetLabel.getCount()); - } - } - } setDisplayCardValues(); @@ -606,6 +660,10 @@ public void incrementFacets(String name, int incrementNum) { // http://stackoverflow.com/questions/1515437/java-function-for-arrays-like-phps-join/1515548#1515548 String combine(String[] s, String glue) { int k = s.length; + return combine(s, glue, k); + } + + String combine(String[] s, String glue, int k) { if (k == 0) { return null; } @@ -1020,7 +1078,68 @@ public boolean isSolrIsDown() { public void setSolrIsDown(boolean solrIsDown) { this.solrIsDown = solrIsDown; } + + public boolean isSolrTemporarilyUnavailable() { + return solrIsTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrIsTemporarilyUnavailable) { + this.solrIsTemporarilyUnavailable = solrIsTemporarilyUnavailable; + } + Boolean solrFacetsDisabled = null; + /** + * Indicates that the fragment should not be requesting facets in Solr + * searches and rendering them on the page. + * @return true if disabled; false by default + */ + public boolean isFacetsDisabled() { + if (this.solrFacetsDisabled != null) { + return this.solrFacetsDisabled; + } + + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacets, false)) { + return this.solrFacetsDisabled = true; + } + + // We also have mechanisms for disabling the facets selectively, just for + // the guests, or anonymous users: + if (session.getUser() instanceof GuestUser) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsForGuestUsers, false)) { + return this.solrFacetsDisabled = true; + } + + // An even lower grade of user than Guest is a truly anonymous user - + // a guest user who came without the session cookie: + Map cookies = FacesContext.getCurrentInstance().getExternalContext().getRequestCookieMap(); + if (!(cookies != null && cookies.containsKey("JSESSIONID"))) { + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableSolrFacetsWithoutJsession, false)) { + return this.solrFacetsDisabled = true; + } + } + } + + return this.solrFacetsDisabled = false; + } + + Boolean disableSecondPassSearch = null; + + /** + * Indicates that we do not need to run the second search query to populate + * the counts for *unchecked* type facets. + * @return true if disabled; false by default + */ + public boolean isUncheckedTypesFacetDisabled() { + if (this.disableSecondPassSearch != null) { + return this.disableSecondPassSearch; + } + if (settingsWrapper.isTrueForKey(SettingsServiceBean.Key.DisableUncheckedTypesFacet, false)) { + return this.disableSecondPassSearch = true; + } + return this.disableSecondPassSearch = false; + } + + public boolean isRootDv() { return rootDv; } @@ -1163,6 +1282,12 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) { friendlyNames.add(friendlyName.get()); return friendlyNames; } + } else if (key.equals(SearchFields.DATASET_LICENSE)) { + try { + friendlyNames.add(BundleUtil.getStringFromPropertyFile("license." + valueWithoutQuotes.toLowerCase().replace(" ","_") + ".name", "License")); + } catch (Exception e) { + logger.fine(String.format("action=getFriendlyNamesFromFilterQuery cannot find friendlyName for key=%s value=%s", key, value)); + } } friendlyNames.add(valueWithoutQuotes); @@ -1300,7 +1425,7 @@ public void setDisplayCardValues() { result.setImageUrl(thumbnailServiceWrapper.getDataverseCardImageAsBase64Url(result)); } else if (result.getType().equals("datasets")) { if (result.getEntity() != null) { - result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsBase64Url(result)); + result.setImageUrl(thumbnailServiceWrapper.getDatasetCardImageAsUrl(result)); } if (result.isHarvested()) { @@ -1394,9 +1519,31 @@ public boolean isActivelyEmbargoed(SolrSearchResult result) { return false; } } + + public boolean isRetentionExpired(SolrSearchResult result) { + Long retentionEndDate = result.getRetentionEndDate(); + if(retentionEndDate != null) { + return LocalDate.now().toEpochDay() > retentionEndDate; + } else { + return false; + } + } + private DataverseRequest getDataverseRequest() { + final HttpServletRequest httpServletRequest = (HttpServletRequest) FacesContext.getCurrentInstance().getExternalContext().getRequest(); + return new DataverseRequest(session.getUser(), httpServletRequest); + } + public boolean isValid(SolrSearchResult result) { - return result.isValid(); + return result.isValid(x -> { + Long id = x.getEntityId(); + DvObject obj = dvObjectService.findDvObject(id); + if(obj != null && obj instanceof Dataset) { + return permissionsWrapper.canUpdateDataset(getDataverseRequest(), (Dataset) obj); + } + logger.fine("isValid called for dvObject that is null (or not a dataset), id: " + id + "This can occur if a dataset is deleted while a search is in progress"); + return true; + }); } public enum SortOrder { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 44976d232c2..1d25dbcdaba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -16,6 +16,7 @@ import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import java.io.IOException; @@ -101,7 +102,7 @@ public class SearchServiceBean { public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, String query, List filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null); } - + /** * Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing * access to Private Data for the correct user @@ -122,6 +123,41 @@ public SolrQueryResponse search(DataverseRequest dataverseRequest, List dataverses, + String query, + List filterQueries, + String sortField, + String sortOrder, + int paginationStart, + boolean onlyDatatRelatedToMe, + int numResultsPerPage, + boolean retrieveEntities, + String geoPoint, + String geoRadius) throws SearchException { + return search(dataverseRequest, dataverses, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true, null, null, true, true); + } + + /** + * @param dataverseRequest + * @param dataverses + * @param query + * @param filterQueries + * @param sortField + * @param sortOrder + * @param paginationStart + * @param onlyDatatRelatedToMe + * @param numResultsPerPage + * @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!) + * @param geoPoint e.g. "35,15" + * @param geoRadius e.g. "5" + * @param addFacets boolean + * @param addHighlights boolean * @return * @throws SearchException */ @@ -130,13 +166,16 @@ public SolrQueryResponse search( List dataverses, String query, List filterQueries, - String sortField, String sortOrder, + String sortField, + String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, boolean retrieveEntities, String geoPoint, - String geoRadius + String geoRadius, + boolean addFacets, + boolean addHighlights ) throws SearchException { if (paginationStart < 0) { @@ -149,71 +188,22 @@ public SolrQueryResponse search( SolrQuery solrQuery = new SolrQuery(); query = SearchUtil.sanitizeQuery(query); solrQuery.setQuery(query); -// SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); -// if (query.equals("*") || query.equals("*:*")) { -// solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); - solrQuery.setSort(new SortClause(sortField, sortOrder)); -// } else { -// solrQuery.setSort(sortClause); -// } -// solrQuery.setSort(sortClause); - solrQuery.setHighlight(true).setHighlightSnippets(1); - Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); - if (fragSize != null) { - solrQuery.setHighlightFragsize(fragSize); - } - solrQuery.setHighlightSimplePre(""); - solrQuery.setHighlightSimplePost(""); - Map solrFieldsToHightlightOnMap = new HashMap<>(); - // TODO: Do not hard code "Name" etc as English here. - solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); - solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); - solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); - solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); - solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); - - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); - solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); - /** - * @todo Dataverse subject and affiliation should be highlighted but - * this is commented out right now because the "friendly" names are not - * being shown on the dataverse cards. See also - * https://github.com/IQSS/dataverse/issues/1431 - */ -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); -// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); - /** - * @todo: show highlight on file card? - * https://redmine.hmdc.harvard.edu/issues/3848 - */ - solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); - solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); - List datasetFields = datasetFieldService.findAllOrderedById(); - for (DatasetFieldType datasetFieldType : datasetFields) { - String solrField = datasetFieldType.getSolrField().getNameSearchable(); - String displayName = datasetFieldType.getDisplayName(); - solrFieldsToHightlightOnMap.put(solrField, displayName); - } - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String solrField = entry.getKey(); - // String displayName = entry.getValue(); - solrQuery.addHighlightField(solrField); + if (sortField != null) { + // is it ok not to specify any sort? - there are cases where we + // don't care, and it must cost some extra cycles -- L.A. + solrQuery.setSort(new SortClause(sortField, sortOrder)); } + solrQuery.setParam("fl", "*,score"); solrQuery.setParam("qt", "/select"); solrQuery.setParam("facet", "true"); + /** * @todo: do we need facet.query? */ solrQuery.setParam("facet.query", "*"); + solrQuery.addFacetField(SearchFields.TYPE); // this one is always performed + for (String filterQuery : filterQueries) { solrQuery.addFilterQuery(filterQuery); } @@ -223,70 +213,139 @@ public SolrQueryResponse search( // See https://solr.apache.org/guide/8_11/spatial-search.html#bbox solrQuery.addFilterQuery("{!bbox sfield=" + SearchFields.GEOLOCATION + "}"); } + + List metadataBlockFacets = new LinkedList<>(); - // ----------------------------------- - // Facets to Retrieve - // ----------------------------------- - solrQuery.addFacetField(SearchFields.METADATA_TYPES); -// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); -// solrQuery.addFacetField(SearchFields.AUTHOR_STRING); - solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); - solrQuery.addFacetField(SearchFields.METADATA_SOURCE); -// solrQuery.addFacetField(SearchFields.AFFILIATION); - solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); -// solrQuery.addFacetField(SearchFields.CATEGORY); -// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); -// solrQuery.addFacetField(SearchFields.DISTRIBUTOR); -// solrQuery.addFacetField(SearchFields.KEYWORD); - /** - * @todo when a new method on datasetFieldService is available - * (retrieveFacetsByDataverse?) only show the facets that the dataverse - * in question wants to show (and in the right order): - * https://redmine.hmdc.harvard.edu/issues/3490 - * - * also, findAll only returns advancedSearchField = true... we should - * probably introduce the "isFacetable" boolean rather than caring about - * if advancedSearchField is true or false - * - */ + if (addFacets) { + + + + // ----------------------------------- + // Facets to Retrieve + // ----------------------------------- + solrQuery.addFacetField(SearchFields.METADATA_TYPES); + solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); + solrQuery.addFacetField(SearchFields.METADATA_SOURCE); + solrQuery.addFacetField(SearchFields.PUBLICATION_YEAR); + /* + * We talked about this in slack on 2021-09-14, Users can see objects on draft/unpublished + * if the owner gives permissions to all users so it makes sense to expose this facet + * to all users. The request of this change started because the order of the facets were + * changed with the PR #9635 and this was unintended. + */ + solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + solrQuery.addFacetField(SearchFields.DATASET_LICENSE); + /** + * @todo when a new method on datasetFieldService is available + * (retrieveFacetsByDataverse?) only show the facets that the + * dataverse in question wants to show (and in the right order): + * https://redmine.hmdc.harvard.edu/issues/3490 + * + * also, findAll only returns advancedSearchField = true... we + * should probably introduce the "isFacetable" boolean rather than + * caring about if advancedSearchField is true or false + * + */ + + if (dataverses != null) { + for (Dataverse dataverse : dataverses) { + if (dataverse != null) { + for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { + DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); + solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); + } + + // Get all metadata block facets configured to be displayed + metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); + } + } + } + + solrQuery.addFacetField(SearchFields.FILE_TYPE); + /** + * @todo: hide the extra line this shows in the GUI... at least it's + * last... + */ + solrQuery.addFacetField(SearchFields.FILE_TAG); + if (!systemConfig.isPublicInstall()) { + solrQuery.addFacetField(SearchFields.ACCESS); + } + } + + List datasetFields = datasetFieldService.findAllOrderedById(); + Map solrFieldsToHightlightOnMap = new HashMap<>(); + if (addHighlights) { + solrQuery.setHighlight(true).setHighlightSnippets(1); + Integer fragSize = systemConfig.getSearchHighlightFragmentSize(); + if (fragSize != null) { + solrQuery.setHighlightFragsize(fragSize); + } + solrQuery.setHighlightSimplePre(""); + solrQuery.setHighlightSimplePost(""); + + // TODO: Do not hard code "Name" etc as English here. + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.LITERAL_QUESTION, BundleUtil.getStringFromBundle("search.datasets.literalquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.INTERVIEW_INSTRUCTIONS, BundleUtil.getStringFromBundle("search.datasets.interviewinstructions")); + solrFieldsToHightlightOnMap.put(SearchFields.POST_QUESTION, BundleUtil.getStringFromBundle("search.datasets.postquestion")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_UNIVERSE, BundleUtil.getStringFromBundle("search.datasets.variableuniverse")); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NOTES, BundleUtil.getStringFromBundle("search.datasets.variableNotes")); + + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Year"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.datasets.persistentId")); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_PERSISTENT_ID, BundleUtil.getStringFromBundle("advanced.search.files.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are + * not being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag"); + + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } + } - List metadataBlockFacets = new LinkedList<>(); //I'm not sure if just adding null here is good for hte permissions system... i think it needs something if(dataverses != null) { for(Dataverse dataverse : dataverses) { // ----------------------------------- // PERMISSION FILTER QUERY // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } - if (dataverse != null) { - for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { - DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); - solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); - } - // Get all metadata block facets configured to be displayed - metadataBlockFacets.addAll(dataverse.getMetadataBlockFacets()); - } } } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe); + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); if (permissionFilterQuery != null) { solrQuery.addFilterQuery(permissionFilterQuery); } } - solrQuery.addFacetField(SearchFields.FILE_TYPE); - /** - * @todo: hide the extra line this shows in the GUI... at least it's - * last... - */ - solrQuery.addFacetField(SearchFields.TYPE); - solrQuery.addFacetField(SearchFields.FILE_TAG); - if (!systemConfig.isPublicInstall()) { - solrQuery.addFacetField(SearchFields.ACCESS); - } + /** * @todo: do sanity checking... throw error if negative */ @@ -328,10 +387,32 @@ public SolrQueryResponse search( // Make the solr query // ----------------------------------- QueryResponse queryResponse = null; + try { queryResponse = solrClientService.getSolrClient().query(solrQuery); + } catch (RemoteSolrException ex) { String messageFromSolr = ex.getLocalizedMessage(); + + logger.fine("message from the solr exception: "+messageFromSolr+"; code: "+ex.code()); + + SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); + + // We probably shouldn't be assuming that this is necessarily a + // "search syntax error", as the code below implies - could be + // something else too - ? + + // Specifically, we now rely on the Solr "circuit breaker" mechanism + // to start dropping requests with 503, when the service is + // overwhelmed with requests load (with the assumption that this is + // a transient condition): + + if (ex.code() == 503) { + // actual logic for communicating this state back to the local + // client code TBD (@todo) + exceptionSolrQueryResponse.setSolrTemporarilyUnavailable(true); + } + String error = "Search Syntax Error: "; String stringToHide = "org.apache.solr.search.SyntaxError: "; if (messageFromSolr.startsWith(stringToHide)) { @@ -341,10 +422,10 @@ public SolrQueryResponse search( error += messageFromSolr; } logger.info(error); - SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery); exceptionSolrQueryResponse.setError(error); // we can't show anything because of the search syntax error + long zeroNumResultsFound = 0; long zeroGetResultsStart = 0; List emptySolrSearchResults = new ArrayList<>(); @@ -360,6 +441,12 @@ public SolrQueryResponse search( } catch (SolrServerException | IOException ex) { throw new SearchException("Internal Dataverse Search Engine Error", ex); } + + int statusCode = queryResponse.getStatus(); + + logger.fine("status code of the query response: "+statusCode); + logger.fine("_size from query response: "+queryResponse._size()); + logger.fine("qtime: "+queryResponse.getQTime()); SolrDocumentList docs = queryResponse.getResults(); List solrSearchResults = new ArrayList<>(); @@ -413,37 +500,49 @@ public SolrQueryResponse search( String identifierOfDataverse = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER_OF_DATAVERSE); String nameOfDataverse = (String) solrDocument.getFieldValue(SearchFields.DATAVERSE_NAME); Long embargoEndDate = (Long) solrDocument.getFieldValue(SearchFields.EMBARGO_END_DATE); + Long retentionEndDate = (Long) solrDocument.getFieldValue(SearchFields.RETENTION_END_DATE); + // Boolean datasetValid = (Boolean) solrDocument.getFieldValue(SearchFields.DATASET_VALID); List matchedFields = new ArrayList<>(); - List highlights = new ArrayList<>(); - Map highlightsMap = new HashMap<>(); - Map> highlightsMap2 = new HashMap<>(); - Map highlightsMap3 = new HashMap<>(); - if (queryResponse.getHighlighting().get(id) != null) { - for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { - String field = entry.getKey(); - String displayName = entry.getValue(); - - List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); - if (highlightSnippets != null) { - matchedFields.add(field); - /** - * @todo only SolrField.SolrType.STRING? that's not - * right... knit the SolrField object more into the - * highlighting stuff - */ - SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); - Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); - highlights.add(highlight); - highlightsMap.put(solrField, highlight); - highlightsMap2.put(solrField, highlightSnippets); - highlightsMap3.put(field, highlight); + + SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + if (addHighlights) { + List highlights = new ArrayList<>(); + Map highlightsMap = new HashMap<>(); + Map> highlightsMap2 = new HashMap<>(); + Map highlightsMap3 = new HashMap<>(); + if (queryResponse.getHighlighting().get(id) != null) { + for (Map.Entry entry : solrFieldsToHightlightOnMap.entrySet()) { + String field = entry.getKey(); + String displayName = entry.getValue(); + + List highlightSnippets = queryResponse.getHighlighting().get(id).get(field); + if (highlightSnippets != null) { + matchedFields.add(field); + /** + * @todo only SolrField.SolrType.STRING? that's not + * right... knit the SolrField object more into the + * highlighting stuff + */ + SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); + Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); + highlights.add(highlight); + highlightsMap.put(solrField, highlight); + highlightsMap2.put(solrField, highlightSnippets); + highlightsMap3.put(field, highlight); + } } + } + solrSearchResult.setHighlightsAsList(highlights); + solrSearchResult.setHighlightsMap(highlightsMap); + solrSearchResult.setHighlightsAsMap(highlightsMap3); } - SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + + /** * @todo put all this in the constructor? */ @@ -470,9 +569,7 @@ public SolrQueryResponse search( solrSearchResult.setNameSort(nameSort); solrSearchResult.setReleaseOrCreateDate(release_or_create_date); solrSearchResult.setMatchedFields(matchedFields); - solrSearchResult.setHighlightsAsList(highlights); - solrSearchResult.setHighlightsMap(highlightsMap); - solrSearchResult.setHighlightsAsMap(highlightsMap3); + Map parent = new HashMap<>(); String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); solrSearchResult.setDescriptionNoSnippet(description); @@ -480,13 +577,13 @@ public SolrQueryResponse search( solrSearchResult.setDvTree(dvTree); solrSearchResult.setDatasetValid(datasetValid); - String originSource = (String) solrDocument.getFieldValue(SearchFields.METADATA_SOURCE); - if (IndexServiceBean.HARVESTED.equals(originSource)) { + if (Boolean.TRUE.equals((Boolean) solrDocument.getFieldValue(SearchFields.IS_HARVESTED))) { solrSearchResult.setHarvested(true); } solrSearchResult.setEmbargoEndDate(embargoEndDate); - + solrSearchResult.setRetentionEndDate(retentionEndDate); + /** * @todo start using SearchConstants class here */ @@ -622,10 +719,12 @@ public SolrQueryResponse search( boolean unpublishedAvailable = false; boolean deaccessionedAvailable = false; boolean hideMetadataSourceFacet = true; + boolean hideLicenseFacet = true; for (FacetField facetField : queryResponse.getFacetFields()) { FacetCategory facetCategory = new FacetCategory(); List facetLabelList = new ArrayList<>(); int numMetadataSources = 0; + int numLicenses = 0; String metadataBlockName = ""; String datasetFieldName = ""; /** @@ -651,23 +750,29 @@ public SolrQueryResponse search( // logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")"); String localefriendlyName = null; if (facetFieldCount.getCount() > 0) { - if(metadataBlockName.length() > 0 ) { - localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); + if(metadataBlockName.length() > 0 ) { + localefriendlyName = getLocaleTitle(datasetFieldName,facetFieldCount.getName(), metadataBlockName); } else if (facetField.getName().equals(SearchFields.METADATA_TYPES)) { - Optional metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst(); - if (metadataBlockFacet.isEmpty()) { + Optional metadataBlockFacet = metadataBlockFacets.stream().filter(blockFacet -> blockFacet.getMetadataBlock().getName().equals(facetFieldCount.getName())).findFirst(); + if (metadataBlockFacet.isEmpty()) { // metadata block facet is not configured to be displayed => ignore continue; - } + } - localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet(); - } else { - try { + localefriendlyName = metadataBlockFacet.get().getMetadataBlock().getLocaleDisplayFacet(); + } else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) { + try { + localefriendlyName = BundleUtil.getStringFromPropertyFile("license." + facetFieldCount.getName().toLowerCase().replace(" ","_") + ".name", "License"); + } catch (Exception e) { + localefriendlyName = facetFieldCount.getName(); + } + } else { + try { localefriendlyName = BundleUtil.getStringFromPropertyFile(facetFieldCount.getName(), "Bundle"); - } catch (Exception e) { + } catch (Exception e) { localefriendlyName = facetFieldCount.getName(); - } - } + } + } FacetLabel facetLabel = new FacetLabel(localefriendlyName, facetFieldCount.getCount()); // quote field facets facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\""); @@ -680,15 +785,19 @@ public SolrQueryResponse search( } else if (facetFieldCount.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) { deaccessionedAvailable = true; } - } - if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) { + } else if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) { numMetadataSources++; + } else if (facetField.getName().equals(SearchFields.DATASET_LICENSE)) { + numLicenses++; } } } if (numMetadataSources > 1) { hideMetadataSourceFacet = false; } + if (numLicenses > 1) { + hideLicenseFacet = false; + } facetCategory.setName(facetField.getName()); // hopefully people will never see the raw facetField.getName() because it may well have an _s at the end facetCategory.setFriendlyName(facetField.getName()); @@ -765,6 +874,10 @@ public SolrQueryResponse search( if (!hideMetadataSourceFacet) { facetCategoryList.add(facetCategory); } + } else if (facetCategory.getName().equals(SearchFields.DATASET_LICENSE)) { + if (!hideLicenseFacet) { + facetCategoryList.add(facetCategory); + } } else { facetCategoryList.add(facetCategory); } @@ -863,7 +976,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -888,14 +1001,132 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ user = GuestUser.get(); } + AuthenticatedUser au = null; + Set groups; + + if (user instanceof GuestUser) { + // Yes, GuestUser may be part of one or more groups; such as IP Groups. + groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); + } else { + if (!(user instanceof AuthenticatedUser)) { + logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest"); + throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest"); + } + + au = (AuthenticatedUser) user; + + // ---------------------------------------------------- + // (3) Is this a Super User? + // If so, they can see everything + // ---------------------------------------------------- + if (au.isSuperuser()) { + // Somewhat dangerous because this user (a superuser) will be able + // to see everything in Solr with no regard to permissions. But it's + // been this way since Dataverse 4.0. So relax. :) + + return dangerZoneNoSolrJoin; + } + + // ---------------------------------------------------- + // (4) User is logged in AND onlyDatatRelatedToMe == true + // Yes, give back everything -> the settings will be in + // the filterqueries given to search + // ---------------------------------------------------- + if (onlyDatatRelatedToMe == true) { + if (systemConfig.myDataDoesNotUsePermissionDocs()) { + logger.fine("old 4.2 behavior: MyData is not using Solr permission docs"); + return dangerZoneNoSolrJoin; + } else { + // fall-through + logger.fine("new post-4.2 behavior: MyData is using Solr permission docs"); + } + } + + // ---------------------------------------------------- + // (5) Work with Authenticated User who is not a Superuser + // ---------------------------------------------------- + + groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); + } + + if (FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled()) { + /** + * Instead of doing a super expensive join, we will rely on the + * new boolean field PublicObject:true for public objects. This field + * is indexed on the content document itself, rather than a permission + * document. An additional join will be added only for any extra, + * more restricted groups that the user may be part of. + * **Note the experimental nature of this optimization**. + */ + StringBuilder sb = new StringBuilder(); + StringBuilder sbgroups = new StringBuilder(); + + // All users, guests and authenticated, should see all the + // documents marked as publicObject_b:true, at least: + sb.append(SearchFields.PUBLIC_OBJECT + ":" + true); + + // One or more groups *may* also be available for this user. Once again, + // do note that Guest users may be part of some groups, such as + // IP groups. + + int groupCounter = 0; + + // An AuthenticatedUser should also be able to see all the content + // on which they have direct permissions: + if (au != null) { + groupCounter++; + sbgroups.append(IndexServiceBean.getGroupPerUserPrefix() + au.getId()); + } + + // In addition to the user referenced directly, we will also + // add joins on all the non-public groups that may exist for the + // user: + for (Group group : groups) { + String groupAlias = group.getAlias(); + if (groupAlias != null && !groupAlias.isEmpty() && !groupAlias.startsWith("builtIn")) { + groupCounter++; + if (groupCounter > 1) { + sbgroups.append(" OR "); + } + sbgroups.append(IndexServiceBean.getGroupPrefix() + groupAlias); + } + } + + if (groupCounter > 1) { + // If there is more than one group, the parentheses must be added: + sbgroups.insert(0, "("); + sbgroups.append(")"); + } + + if (groupCounter > 0) { + // If there are any groups for this user, an extra join must be + // added to the query, and the extra sub-query must be added to + // the combined Solr query: + sb.append(" OR {!join from=" + SearchFields.DEFINITION_POINT + " to=id v=$q1}"); + // Add the subquery to the combined Solr query: + solrQuery.setParam("q1", SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); + logger.info("The sub-query q1 set to " + SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); + } + + String ret = sb.toString(); + logger.fine("Returning experimental query: " + ret); + return ret; + } + + // END OF EXPERIMENTAL OPTIMIZATION + + // Old, un-optimized way of handling permissions. + // Largely left intact, minus the lookups that have already been performed + // above. + // ---------------------------------------------------- // (1) Is this a GuestUser? - // Yes, see if GuestUser is part of any groups such as IP Groups. // ---------------------------------------------------- if (user instanceof GuestUser) { - String groupsFromProviders = ""; - Set groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); + StringBuilder sb = new StringBuilder(); + + String groupsFromProviders = ""; for (Group group : groups) { logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); String groupAlias = group.getAlias(); @@ -912,49 +1143,11 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ return guestWithGroups; } - // ---------------------------------------------------- - // (2) Retrieve Authenticated User - // ---------------------------------------------------- - if (!(user instanceof AuthenticatedUser)) { - logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest"); - throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest"); - } - - AuthenticatedUser au = (AuthenticatedUser) user; - - // Logged in user, has publication status facet - // - solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); - - // ---------------------------------------------------- - // (3) Is this a Super User? - // Yes, give back everything - // ---------------------------------------------------- - if (au.isSuperuser()) { - // Somewhat dangerous because this user (a superuser) will be able - // to see everything in Solr with no regard to permissions. But it's - // been this way since Dataverse 4.0. So relax. :) - - return dangerZoneNoSolrJoin; - } - - // ---------------------------------------------------- - // (4) User is logged in AND onlyDatatRelatedToMe == true - // Yes, give back everything -> the settings will be in - // the filterqueries given to search - // ---------------------------------------------------- - if (onlyDatatRelatedToMe == true) { - if (systemConfig.myDataDoesNotUsePermissionDocs()) { - logger.fine("old 4.2 behavior: MyData is not using Solr permission docs"); - return dangerZoneNoSolrJoin; - } else { - logger.fine("new post-4.2 behavior: MyData is using Solr permission docs"); - } - } - // ---------------------------------------------------- // (5) Work with Authenticated User who is not a Superuser - // ---------------------------------------------------- + // ---------------------------------------------------- + // It was already confirmed, that if the user is not GuestUser, we + // have an AuthenticatedUser au which is not null. /** * @todo all this code needs cleanup and clarification. */ @@ -985,7 +1178,6 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ * a given "content document" (dataset version, etc) in Solr. */ String groupsFromProviders = ""; - Set groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); StringBuilder sb = new StringBuilder(); for (Group group : groups) { logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 04021eb75b6..cfe29ea08c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -357,7 +357,6 @@ private void persistToSolr(Collection docs) throws SolrServer * @todo Do something with these responses from Solr. */ UpdateResponse addResponse = solrClientService.getSolrClient().add(docs); - UpdateResponse commitResponse = solrClientService.getSolrClient().commit(); } public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) { @@ -504,11 +503,6 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { */ return new IndexResponse("problem deleting the following documents from Solr: " + solrIdsToDelete); } - try { - solrClientService.getSolrClient().commit(); - } catch (SolrServerException | IOException ex) { - return new IndexResponse("problem committing deletion of the following documents from Solr: " + solrIdsToDelete); - } return new IndexResponse("no known problem deleting the following documents from Solr:" + solrIdsToDelete); } @@ -516,7 +510,6 @@ public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServer JsonObjectBuilder response = Json.createObjectBuilder(); logger.info("attempting to delete all Solr documents before a complete re-index"); solrClientService.getSolrClient().deleteByQuery("*:*"); - solrClientService.getSolrClient().commit(); int numRowsAffected = dvObjectService.clearAllIndexTimes(); response.add(numRowsClearedByClearAllIndexTimes, numRowsAffected); response.add(messageString, "Solr index and database index timestamps cleared."); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java index 893099ff08d..27e79cb1fc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrQueryResponse.java @@ -26,6 +26,7 @@ public class SolrQueryResponse { private String error; private Map dvObjectCounts = new HashMap<>(); private Map publicationStatusCounts = new HashMap<>(); + private boolean solrTemporarilyUnavailable = false; public static String DATAVERSES_COUNT_KEY = "dataverses_count"; public static String DATASETS_COUNT_KEY = "datasets_count"; @@ -91,7 +92,14 @@ public JsonObjectBuilder getPublicationStatusCountsAsJSON(){ } return this.getMapCountsAsJSON(publicationStatusCounts); } - + + public boolean isSolrTemporarilyUnavailable() { + return solrTemporarilyUnavailable; + } + + public void setSolrTemporarilyUnavailable(boolean solrTemporarilyUnavailable) { + this.solrTemporarilyUnavailable = solrTemporarilyUnavailable; + } public JsonObjectBuilder getDvObjectCountsAsJSON(){ diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index 6ad7f9dbbf6..e84c8f133da 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -7,8 +7,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; import java.util.logging.Logger; +import edu.harvard.iq.dataverse.*; import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; @@ -16,22 +18,14 @@ import org.apache.commons.collections4.CollectionUtils; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetRelPublication; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.DvObject; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetThumbnail; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.DateUtil; import edu.harvard.iq.dataverse.util.json.JsonPrinter; import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; public class SolrSearchResult { - private static final Logger logger = Logger.getLogger(SolrSearchResult.class.getCanonicalName()); private String id; @@ -123,6 +117,8 @@ public class SolrSearchResult { private Long embargoEndDate; + private Long retentionEndDate; + private boolean datasetValid; public String getDvTree() { @@ -403,21 +399,12 @@ public JsonArrayBuilder getRelevance() { return matchedFieldsArray; } - public JsonObject toJsonObject(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) { - return toJsonObject(showRelevance, showEntityIds, showApiUrls, null); - } - - public JsonObject toJsonObject(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, - List metadataFields) { - return json(showRelevance, showEntityIds, showApiUrls, metadataFields).build(); - } - /** * Add additional fields for the MyData page * * @return */ - public JsonObjectBuilder getJsonForMyData() { + public JsonObjectBuilder getJsonForMyData(boolean isValid) { JsonObjectBuilder myDataJson = json(true, true, true);// boolean showRelevance, boolean showEntityIds, boolean showApiUrls) @@ -425,7 +412,7 @@ public JsonObjectBuilder getJsonForMyData() { .add("is_draft_state", this.isDraftState()).add("is_in_review_state", this.isInReviewState()) .add("is_unpublished_state", this.isUnpublishedState()).add("is_published", this.isPublishedState()) .add("is_deaccesioned", this.isDeaccessionedState()) - .add("is_valid", this.isValid()) + .add("is_valid", isValid) .add("date_to_display_on_card", getDateToDisplayOnCard()); // Add is_deaccessioned attribute, even though MyData currently screens any deaccessioned info out @@ -436,7 +423,7 @@ public JsonObjectBuilder getJsonForMyData() { if ((this.getParent() != null) && (!this.getParent().isEmpty())) { // System.out.println("keys:" + parent.keySet().toString()); - if (this.entity.isInstanceofDataFile()) { + if (this.entity != null && this.entity.isInstanceofDataFile()) { myDataJson.add("parentIdentifier", this.getParent().get(SolrSearchResult.PARENT_IDENTIFIER)) .add("parentName", this.getParent().get("name")); @@ -450,12 +437,10 @@ public JsonObjectBuilder getJsonForMyData() { } // getJsonForMydata public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls) { - return json(showRelevance, showEntityIds, showApiUrls, null); + return json(showRelevance, showEntityIds, showApiUrls, null, null); } - public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, - List metadataFields) { - + public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, boolean showApiUrls, List metadataFields, Long datasetFileCount) { if (this.type == null) { return jsonObjectBuilder(); } @@ -571,7 +556,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool subjects.add(subject); } nullSafeJsonBuilder.add("subjects", subjects); - nullSafeJsonBuilder.add("fileCount", dv.getFileMetadatas().size()); + nullSafeJsonBuilder.add("fileCount", datasetFileCount); nullSafeJsonBuilder.add("versionId", dv.getId()); nullSafeJsonBuilder.add("versionState", dv.getVersionState().toString()); if (this.isPublishedState()) { @@ -1261,11 +1246,31 @@ public void setEmbargoEndDate(Long embargoEndDate) { this.embargoEndDate = embargoEndDate; } + public Long getRetentionEndDate() { + return retentionEndDate; + } + + public void setRetentionEndDate(Long retentionEndDate) { + this.retentionEndDate = retentionEndDate; + } + public void setDatasetValid(Boolean datasetValid) { this.datasetValid = datasetValid == null || Boolean.valueOf(datasetValid); } - public boolean isValid() { - return datasetValid; + public boolean isValid(Predicate canUpdateDataset) { + if (this.datasetValid) { + return true; + } + if (!this.getType().equals("datasets")) { + return true; + } + if (this.isDraftState()) { + return false; + } + if (!JvmSettings.UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED.lookupOptional(Boolean.class).orElse(true)) { + return true; + } + return !canUpdateDataset.test(this); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java index a2c3f53d59d..96222f40daf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/ConfigCheckService.java @@ -1,25 +1,40 @@ package edu.harvard.iq.dataverse.settings; +import edu.harvard.iq.dataverse.MailServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; import edu.harvard.iq.dataverse.util.FileUtil; - +import edu.harvard.iq.dataverse.util.MailSessionProducer; import jakarta.annotation.PostConstruct; import jakarta.ejb.DependsOn; import jakarta.ejb.Singleton; import jakarta.ejb.Startup; +import jakarta.inject.Inject; +import jakarta.mail.internet.InternetAddress; + import java.io.IOException; import java.nio.file.FileSystemException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; +import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; @Startup @Singleton -@DependsOn("StartupFlywayMigrator") +@DependsOn({"StartupFlywayMigrator", "PidProviderFactoryBean"}) public class ConfigCheckService { private static final Logger logger = Logger.getLogger(ConfigCheckService.class.getCanonicalName()); + + @Inject + MailSessionProducer mailSessionProducer; + @Inject + MailServiceBean mailService; + @Inject + PidProviderFactoryBean pidProviderFactoryBean; public static class ConfigurationError extends RuntimeException { public ConfigurationError(String message) { @@ -29,11 +44,14 @@ public ConfigurationError(String message) { @PostConstruct public void startup() { - if (!checkSystemDirectories()) { + if (!checkSystemDirectories() || !checkPidProviders()) { throw new ConfigurationError("Not all configuration checks passed successfully. See logs above."); } + + // Only checks resulting in warnings, nothing critical that needs to stop deployment + checkSystemMailSetup(); } - + /** * In this method, we check the existence and write-ability of all important directories we use during * normal operations. It does not include checks for the storage system. If directories are not available, @@ -77,5 +95,56 @@ public boolean checkSystemDirectories() { } return success; } + + /** + * This method is not expected to make a deployment fail, but send out clear warning messages about missing or + * wrong configuration settings. + */ + public void checkSystemMailSetup() { + // Check if a system mail setting has been provided or issue warning about disabled mail notifications + Optional mailAddress = mailService.getSystemAddress(); + + // Not present -> warning + if (mailAddress.isEmpty()) { + logger.warning("Could not find a system mail setting in database (key :" + Key.SystemEmail + ", deprecated) or JVM option '" + JvmSettings.SYSTEM_EMAIL.getScopedKey() + "'"); + logger.warning("Mail notifications and system messages are deactivated until you provide a configuration"); + } + + // If there is an app server provided mail config, let's determine if the setup is matching + // TODO: when support for appserver provided mail session goes away, this code can be deleted + if (mailSessionProducer.hasSessionFromAppServer()) { + if (mailAddress.isEmpty()) { + logger.warning("Found a mail session provided by app server, but no system mail address (see logs above)"); + // Check if the "from" in the session is the same as the system mail address (see issue 4210) + } else { + String sessionFrom = mailSessionProducer.getSession().getProperty("mail.from"); + if (! mailAddress.get().toString().equals(sessionFrom)) { + logger.warning(() -> String.format( + "Found app server mail session provided 'from' (%s) does not match system mail setting (%s)", + sessionFrom, mailAddress.get())); + } + } + } + } + /** + * Verifies that at least one PidProvider capable of editing/minting PIDs is + * configured. Requires the @DependsOn("PidProviderFactoryBean") annotation above + * since it is the @PostCOnstruct init() method of that class that loads the PidProviders + * + * @return True if all checks successful, false otherwise. + */ + private boolean checkPidProviders() { + // Check if at least one PidProvider capable of editing/minting PIDs is configured. + boolean valid=true; + if(!(PidUtil.getManagedProviderIds().size() > 0)) { + valid = false; + logger.warning("No PID providers configured"); + } + if (pidProviderFactoryBean.getDefaultPidGenerator()==null){ + valid=false; + logger.warning("No default PID provider configured"); + } + return valid; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index afa5a1c986a..021977ff8c6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -36,6 +36,61 @@ public enum FeatureFlags { * @since Dataverse @TODO: */ API_BEARER_AUTH("api-bearer-auth"), + /** + * For published (public) objects, don't use a join when searching Solr. + * Experimental! Requires a reindex with the following feature flag enabled, + * in order to add the boolean publicObject_b:true field to all the public + * Solr documents. + * + * @apiNote Raise flag by setting + * "dataverse.feature.avoid-expensive-solr-join" + * @since Dataverse 6.3 + */ + AVOID_EXPENSIVE_SOLR_JOIN("avoid-expensive-solr-join"), + /** + * With this flag enabled, the boolean field publicObject_b:true will be + * added to all the indexed Solr documents for publicly-available collections, + * datasets and files. This flag makes it possible to rely on it in searches, + * instead of the very expensive join (the feature flag above). + * + * @apiNote Raise flag by setting + * "dataverse.feature.add-publicobject-solr-field" + * @since Dataverse 6.3 + */ + ADD_PUBLICOBJECT_SOLR_FIELD("add-publicobject-solr-field"), + /** + * With this flag set, Dataverse will index the actual origin of harvested + * metadata records, instead of the "Harvested" string in all cases. + * + * @apiNote Raise flag by setting + * "dataverse.feature.index-harvested-metadata-source" + * @since Dataverse 6.3 + */ + INDEX_HARVESTED_METADATA_SOURCE("index-harvested-metadata-source"), + + /** + * Dataverse normally deletes all solr documents related to a dataset's files + * when the dataset is reindexed. With this flag enabled, additional logic is + * added to the reindex process to delete only the solr documents that are no + * longer needed. (Required docs will be updated rather than deleted and + * replaced.) Enabling this feature flag should make the reindex process + * faster without impacting the search results. + * + * @apiNote Raise flag by setting + * "dataverse.feature.reduce-solr-deletes" + * @since Dataverse 6.3 + */ + REDUCE_SOLR_DELETES("reduce-solr-deletes"), + /** + * With this flag enabled, the Return To Author pop-up will not have a required + * "Reason" field, and a reason will not be required in the + * /api/datasets/{id}/returnToAuthor api call. + * + * @apiNote Raise flag by setting + * "dataverse.feature.disable-return-to-author-reason" + * @since Dataverse 6.3 + */ + DISABLE_RETURN_TO_AUTHOR_REASON("disable-return-to-author-reason"), ; final String flag; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index cc3272413c7..9d13be005c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -50,6 +50,11 @@ public enum JvmSettings { UPLOADS_DIRECTORY(SCOPE_FILES, "uploads"), DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), + GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), + + //STORAGE DRIVER SETTINGS + SCOPE_DRIVER(SCOPE_FILES), + DISABLE_S3_TAGGING(SCOPE_DRIVER, "disable-tagging"), // SOLR INDEX SETTINGS SCOPE_SOLR(PREFIX, "solr"), @@ -59,6 +64,10 @@ public enum JvmSettings { SOLR_CORE(SCOPE_SOLR, "core"), SOLR_PATH(SCOPE_SOLR, "path"), + // INDEX CONCURENCY + SCOPE_SOLR_CONCURENCY(SCOPE_SOLR, "concurrency"), + MAX_ASYNC_INDEXES(SCOPE_SOLR_CONCURENCY, "max-async-indexes"), + // RSERVE CONNECTION SCOPE_RSERVE(PREFIX, "rserve"), RSERVE_HOST(SCOPE_RSERVE, "host"), @@ -90,40 +99,116 @@ public enum JvmSettings { // PERSISTENT IDENTIFIER SETTINGS SCOPE_PID(PREFIX, "pid"), - - // PROVIDER EZID (legacy) - these settings were formerly kept together with DataCite ones - SCOPE_PID_EZID(SCOPE_PID, "ezid"), - EZID_API_URL(SCOPE_PID_EZID, "api-url", "doi.baseurlstring"), - EZID_USERNAME(SCOPE_PID_EZID, "username", "doi.username"), - EZID_PASSWORD(SCOPE_PID_EZID, "password", "doi.password"), + PID_PROVIDERS(SCOPE_PID, "providers"), + PID_DEFAULT_PROVIDER(SCOPE_PID, "default-provider"), + SCOPE_PID_PROVIDER(SCOPE_PID), + PID_PROVIDER_TYPE(SCOPE_PID_PROVIDER, "type"), + PID_PROVIDER_LABEL(SCOPE_PID_PROVIDER, "label"), + PID_PROVIDER_AUTHORITY(SCOPE_PID_PROVIDER, "authority"), + PID_PROVIDER_SHOULDER(SCOPE_PID_PROVIDER, "shoulder"), + PID_PROVIDER_IDENTIFIER_GENERATION_STYLE(SCOPE_PID_PROVIDER, "identifier-generation-style"), + PID_PROVIDER_DATAFILE_PID_FORMAT(SCOPE_PID_PROVIDER, "datafile-pid-format"), + PID_PROVIDER_MANAGED_LIST(SCOPE_PID_PROVIDER, "managed-list"), + PID_PROVIDER_EXCLUDED_LIST(SCOPE_PID_PROVIDER, "excluded-list"), + + + // PROVIDER EZID - these settings were formerly kept together with DataCite ones + SCOPE_PID_EZID(SCOPE_PID_PROVIDER, "ezid"), + EZID_API_URL(SCOPE_PID_EZID, "api-url"), + EZID_USERNAME(SCOPE_PID_EZID, "username"), + EZID_PASSWORD(SCOPE_PID_EZID, "password"), // PROVIDER DATACITE - SCOPE_PID_DATACITE(SCOPE_PID, "datacite"), - DATACITE_MDS_API_URL(SCOPE_PID_DATACITE, "mds-api-url", "doi.baseurlstring"), - DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", "doi.mdcbaseurlstring"), - DATACITE_USERNAME(SCOPE_PID_DATACITE, "username", "doi.username"), - DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password", "doi.password"), + SCOPE_PID_DATACITE(SCOPE_PID_PROVIDER, "datacite"), + DATACITE_MDS_API_URL(SCOPE_PID_DATACITE, "mds-api-url"), + DATACITE_REST_API_URL(SCOPE_PID_DATACITE, "rest-api-url"), + DATACITE_USERNAME(SCOPE_PID_DATACITE, "username"), + DATACITE_PASSWORD(SCOPE_PID_DATACITE, "password"), // PROVIDER PERMALINK - SCOPE_PID_PERMALINK(SCOPE_PID, "permalink"), - PERMALINK_BASEURL(SCOPE_PID_PERMALINK, "base-url", "perma.baseurlstring"), + SCOPE_PID_PERMALINK(SCOPE_PID_PROVIDER, "permalink"), + PERMALINK_BASE_URL(SCOPE_PID_PERMALINK, "base-url"), + PERMALINK_SEPARATOR(SCOPE_PID_PERMALINK, "separator"), // PROVIDER HANDLE - SCOPE_PID_HANDLENET(SCOPE_PID, "handlenet"), - HANDLENET_INDEX(SCOPE_PID_HANDLENET, "index", "dataverse.handlenet.index"), + SCOPE_PID_HANDLENET(SCOPE_PID_PROVIDER, "handlenet"), + HANDLENET_INDEX(SCOPE_PID_HANDLENET, "index"), + HANDLENET_INDEPENDENT_SERVICE(SCOPE_PID_HANDLENET, "independent-service"), + HANDLENET_AUTH_HANDLE(SCOPE_PID_HANDLENET, "auth-handle"), SCOPE_PID_HANDLENET_KEY(SCOPE_PID_HANDLENET, "key"), - HANDLENET_KEY_PATH(SCOPE_PID_HANDLENET_KEY, "path", "dataverse.handlenet.admcredfile"), - HANDLENET_KEY_PASSPHRASE(SCOPE_PID_HANDLENET_KEY, "passphrase", "dataverse.handlenet.admprivphrase"), + HANDLENET_KEY_PATH(SCOPE_PID_HANDLENET_KEY, "path"), + HANDLENET_KEY_PASSPHRASE(SCOPE_PID_HANDLENET_KEY, "passphrase"), + + /* + * The deprecated legacy settings below are from when you could only have a + * single PIDProvider. They mirror the settings above, but are global,not within + * the SCOPE_PID_PROVIDER of an individual provider. + */ + /** + * DEPRECATED PROVIDER DATACITE + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_DATACITE(SCOPE_PID, "datacite"), + LEGACY_DATACITE_MDS_API_URL(SCOPE_LEGACY_PID_DATACITE, "mds-api-url", "doi.baseurlstring"), + LEGACY_DATACITE_REST_API_URL(SCOPE_LEGACY_PID_DATACITE, "rest-api-url", "doi.dataciterestapiurlstring", + "doi.mdcbaseurlstring"), + LEGACY_DATACITE_USERNAME(SCOPE_LEGACY_PID_DATACITE, "username", "doi.username"), + LEGACY_DATACITE_PASSWORD(SCOPE_LEGACY_PID_DATACITE, "password", "doi.password"), + + /** + * DEPRECATED PROVIDER EZID + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_EZID(SCOPE_PID, "ezid"), LEGACY_EZID_API_URL(SCOPE_LEGACY_PID_EZID, "api-url"), + LEGACY_EZID_USERNAME(SCOPE_LEGACY_PID_EZID, "username"), LEGACY_EZID_PASSWORD(SCOPE_LEGACY_PID_EZID, "password"), + + /** + * DEPRECATED PROVIDER PERMALINK + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_PERMALINK(SCOPE_PID, "permalink"), + LEGACY_PERMALINK_BASEURL(SCOPE_LEGACY_PID_PERMALINK, "base-url", "perma.baseurlstring"), + + /** + * DEPRECATED PROVIDER HANDLE + * + * @deprecated - legacy single provider setting providing backward compatibility + */ + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_HANDLENET(SCOPE_PID, "handlenet"), + LEGACY_HANDLENET_INDEX(SCOPE_LEGACY_PID_HANDLENET, "index", "dataverse.handlenet.index"), + @Deprecated(forRemoval = true, since = "2024-02-13") + SCOPE_LEGACY_PID_HANDLENET_KEY(SCOPE_LEGACY_PID_HANDLENET, "key"), + LEGACY_HANDLENET_KEY_PATH(SCOPE_LEGACY_PID_HANDLENET_KEY, "path", "dataverse.handlenet.admcredfile"), + LEGACY_HANDLENET_KEY_PASSPHRASE(SCOPE_LEGACY_PID_HANDLENET_KEY, "passphrase", "dataverse.handlenet.admprivphrase"), // SPI SETTINGS SCOPE_SPI(PREFIX, "spi"), SCOPE_EXPORTERS(SCOPE_SPI, "exporters"), EXPORTERS_DIRECTORY(SCOPE_EXPORTERS, "directory"), + SCOPE_PIDPROVIDERS(SCOPE_SPI, "pidproviders"), + PIDPROVIDERS_DIRECTORY(SCOPE_PIDPROVIDERS, "directory"), // MAIL SETTINGS SCOPE_MAIL(PREFIX, "mail"), + SYSTEM_EMAIL(SCOPE_MAIL, "system-email"), SUPPORT_EMAIL(SCOPE_MAIL, "support-email"), CC_SUPPORT_ON_CONTACT_EMAIL(SCOPE_MAIL, "cc-support-on-contact-email"), + MAIL_DEBUG(SCOPE_MAIL, "debug"), + // Mail Transfer Agent settings + SCOPE_MAIL_MTA(SCOPE_MAIL, "mta"), + MAIL_MTA_AUTH(SCOPE_MAIL_MTA, "auth"), + MAIL_MTA_USER(SCOPE_MAIL_MTA, "user"), + MAIL_MTA_PASSWORD(SCOPE_MAIL_MTA, "password"), + MAIL_MTA_SUPPORT_UTF8(SCOPE_MAIL_MTA, "allow-utf8-addresses"), + // Placeholder setting for a large list of extra settings + MAIL_MTA_SETTING(SCOPE_MAIL_MTA), // AUTH SETTINGS SCOPE_AUTH(PREFIX, "auth"), @@ -145,11 +230,22 @@ public enum JvmSettings { SCOPE_UI(PREFIX, "ui"), UI_ALLOW_REVIEW_INCOMPLETE(SCOPE_UI, "allow-review-for-incomplete"), UI_SHOW_VALIDITY_FILTER(SCOPE_UI, "show-validity-filter"), + UI_SHOW_VALIDITY_LABEL_WHEN_PUBLISHED(SCOPE_UI, "show-validity-label-when-published"), // NetCDF SETTINGS SCOPE_NETCDF(PREFIX, "netcdf"), GEO_EXTRACT_S3_DIRECT_UPLOAD(SCOPE_NETCDF, "geo-extract-s3-direct-upload"), + // BAGIT SETTINGS + SCOPE_BAGIT(PREFIX, "bagit"), + SCOPE_BAGIT_SOURCEORG(SCOPE_BAGIT, "sourceorg"), + BAGIT_SOURCE_ORG_NAME(SCOPE_BAGIT_SOURCEORG, "name"), + BAGIT_SOURCEORG_ADDRESS(SCOPE_BAGIT_SOURCEORG, "address"), + BAGIT_SOURCEORG_EMAIL(SCOPE_BAGIT_SOURCEORG, "email"), + + // STORAGE USE SETTINGS + SCOPE_STORAGEUSE(PREFIX, "storageuse"), + STORAGEUSE_DISABLE_UPDATES(SCOPE_STORAGEUSE, "disable-storageuse-increments"), ; private static final String SCOPE_SEPARATOR = "."; diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index 6b74810eb53..8ed96690e84 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -45,6 +45,7 @@ public class SettingsServiceBean { * over your shoulder when typing strings in various places of a large app. * So there. */ + @SuppressWarnings("java:S115") public enum Key { AllowApiTokenLookupViaApi, /** @@ -54,6 +55,10 @@ public enum Key { CustomDatasetSummaryFields, /** * Defines a public installation -- all datafiles are unrestricted + * + * This was added along with CloudEnvironmentName and ComputeBaseUrl. + * See https://github.com/IQSS/dataverse/issues/3776 and + * https://github.com/IQSS/dataverse/pull/3967 */ PublicInstall, /** @@ -74,9 +79,12 @@ public enum Key { /** * For example, https://datacapture.example.org */ + @Deprecated(forRemoval = true, since = "2024-07-07") DataCaptureModuleUrl, + @Deprecated(forRemoval = true, since = "2024-07-07") RepositoryStorageAbstractionLayerUrl, UploadMethods, + @Deprecated(forRemoval = true, since = "2024-07-07") DownloadMethods, /** * If the data replicated around the world using RSAL (Repository @@ -86,7 +94,17 @@ public enum Key { * TODO: Think about if it makes sense to make this a column in the * StorageSite database table. */ + @Deprecated(forRemoval = true, since = "2024-07-07") LocalDataAccessPath, + /** + * The algorithm used to generate PIDs, randomString (default) or + * storedProcedure + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") IdentifierGenerationStyle, OAuth2CallbackUrl, DefaultAuthProvider, @@ -189,24 +207,51 @@ public enum Key { SignUpUrl, /** Key for whether we allow users to sign up */ AllowSignUp, - /** protocol for global id */ + /** + * protocol for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Protocol, - /** authority for global id */ + /** + * authority for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Authority, - /** DoiProvider for global id */ + /** + * DoiProvider for global id + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") DoiProvider, - /** Shoulder for global id - used to create a common prefix on identifiers */ + /** + * Shoulder for global id - used to create a common prefix on identifiers + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") Shoulder, - /* Removed for now - tried to add here but DOI Service Bean didn't like it at start-up - DoiUsername, - DoiPassword, - DoiBaseurlstring, - */ /** Optionally override http://guides.dataverse.org . */ GuidesBaseUrl, CVocConf, + // Default calls per hour for each tier. csv format (30,60,...) + RateLimitingDefaultCapacityTiers, + // json defined list of capacities by tier and action list. See RateLimitSetting.java + RateLimitingCapacityByTierAndAction, /** * A link to an installation of https://github.com/IQSS/miniverse or * some other metrics app. @@ -227,7 +272,12 @@ public enum Key { /* the number of files the GUI user is allowed to upload in one batch, via drag-and-drop, or through the file select dialog */ MultipleUploadFilesLimit, - /* return email address for system emails such as notifications */ + /** + * Return email address for system emails such as notifications + * @deprecated Please replace usages with {@link edu.harvard.iq.dataverse.MailServiceBean#getSystemAddress}, + * which is backward compatible with this setting. + */ + @Deprecated(since = "6.2", forRemoval = true) SystemEmail, /* size limit for Tabular data file ingests */ /* (can be set separately for specific ingestable formats; in which @@ -347,10 +397,16 @@ Whether Harvesting (OAI) service is enabled */ PVCustomPasswordResetAlertMessage, /* - String to describe DOI format for data files. Default is DEPENDENT. - 'DEPENEDENT' means the DOI will be the Dataset DOI plus a file DOI with a slash in between. - 'INDEPENDENT' means a new global id, completely independent from the dataset-level global id. - */ + * String to describe DOI format for data files. Default is DEPENDENT. + * 'DEPENDENT' means the DOI will be the Dataset DOI plus a file DOI with a + * slash in between. 'INDEPENDENT' means a new global id, completely independent + * from the dataset-level global id. + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") DataFilePIDFormat, /* Json array of supported languages */ @@ -358,7 +414,7 @@ Whether Harvesting (OAI) service is enabled /* Number for the minimum number of files to send PID registration to asynchronous workflow */ - PIDAsynchRegFileCount, + //PIDAsynchRegFileCount, /** * */ @@ -366,12 +422,22 @@ Whether Harvesting (OAI) service is enabled /** * Indicates if the Handle service is setup to work 'independently' (No communication with the Global Handle Registry) + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * */ + @Deprecated(forRemoval = true, since = "2024-02-13") IndependentHandleService, /** Handle to use for authentication if the default is not being used - */ + * + * @deprecated New installations should not use this database setting, but use + * the settings within {@link JvmSettings#SCOPE_PID}. + * + */ + @Deprecated(forRemoval = true, since = "2024-02-13") HandleAuthHandle, /** @@ -461,18 +527,6 @@ Whether Harvesting (OAI) service is enabled */ ExportInstallationAsDistributorOnlyWhenNotSet, - /** - * Basic Globus Token for Globus Application - */ - GlobusBasicToken, - /** - * GlobusEndpoint is Globus endpoint for Globus application - */ - GlobusEndpoint, - /** - * Comma separated list of Globus enabled stores - */ - GlobusStores, /** Globus App URL * */ @@ -540,6 +594,12 @@ Whether Harvesting (OAI) service is enabled * n: embargo enabled with n months the maximum allowed duration */ MaxEmbargoDurationInMonths, + /** This setting enables Retention capabilities in Dataverse and sets the minimum Retention duration allowed. + * 0 or not set: new retentions disabled + * -1: retention enabled, no time limit + * n: retention enabled with n months the minimum allowed duration + */ + MinRetentionDurationInMonths, /* * Include "Custom Terms" as an item in the license drop-down or not. */ @@ -605,7 +665,20 @@ Whether Harvesting (OAI) service is enabled /* * True/false(default) option deciding whether file PIDs can be enabled per collection - using the Dataverse/collection set attribute API call. */ - AllowEnablingFilePIDsPerCollection + AllowEnablingFilePIDsPerCollection, + /** + * Allows an instance admin to disable Solr search facets on the collection + * and dataset pages instantly + */ + DisableSolrFacets, + DisableSolrFacetsForGuestUsers, + DisableSolrFacetsWithoutJsession, + DisableUncheckedTypesFacet, + /** + * When ingesting tabular data files, store the generated tab-delimited + * files *with* the variable names line up top. + */ + StoreIngestedTabularFilesWithVarHeaders ; @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java index 86ae697f771..8408e7d91f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtil.java @@ -1,194 +1,140 @@ package edu.harvard.iq.dataverse.sitemap; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObjectContainer; -import edu.harvard.iq.dataverse.settings.ConfigCheckService; -import edu.harvard.iq.dataverse.settings.JvmSettings; -import edu.harvard.iq.dataverse.util.SystemConfig; -import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; -import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; -import java.text.SimpleDateFormat; +import java.text.ParseException; +import java.time.format.DateTimeFormatter; import java.util.List; import java.util.logging.Logger; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerConfigurationException; -import javax.xml.transform.TransformerException; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.xml.sax.SAXException; + +import com.redfin.sitemapgenerator.W3CDateFormat; +import com.redfin.sitemapgenerator.W3CDateFormat.Pattern; +import com.redfin.sitemapgenerator.WebSitemapGenerator; +import com.redfin.sitemapgenerator.WebSitemapUrl; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.settings.ConfigCheckService; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.SystemConfig; public class SiteMapUtil { + static final String DATE_PATTERN = "yyyy-MM-dd"; + static final String SITEMAP_FILENAME_STAGED = "sitemap.xml.staged"; + /** @see https://www.sitemaps.org/protocol.html#index */ + static final int SITEMAP_LIMIT = 50000; + private static final Logger logger = Logger.getLogger(SiteMapUtil.class.getCanonicalName()); + private static DateTimeFormatter formatter = DateTimeFormatter.ofPattern(DATE_PATTERN); - static final String SITEMAP_FILENAME_FINAL = "sitemap.xml"; - static final String SITEMAP_FILENAME_STAGED = "sitemap.xml.staged"; - /** - * TODO: Handle more than 50,000 entries in the sitemap. - * - * (As of this writing Harvard Dataverse only has ~3000 dataverses and - * ~30,000 datasets.) - * - * "each Sitemap file that you provide must have no more than 50,000 URLs" - * https://www.sitemaps.org/protocol.html - * - * Consider using a third party library: "One sitemap can contain a maximum - * of 50,000 URLs. (Some sitemaps, like Google News sitemaps, can contain - * only 1,000 URLs.) If you need to put more URLs than that in a sitemap, - * you'll have to use a sitemap index file. Fortunately, WebSitemapGenerator - * can manage the whole thing for you." - * https://github.com/dfabulich/sitemapgen4j - */ public static void updateSiteMap(List dataverses, List datasets) { logger.info("BEGIN updateSiteMap"); - String sitemapPathString = getSitemapPathString(); - String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; - String finalSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_FINAL; - - Path stagedPath = Paths.get(stagedSitemapPathAndFileString); - if (Files.exists(stagedPath)) { - logger.warning("Unable to update sitemap! The staged file from a previous run already existed. Delete " + stagedSitemapPathAndFileString + " and try again."); + final String dataverseSiteUrl = SystemConfig.getDataverseSiteUrlStatic(); + final String msgErrorFormat = "Problem with %s : %s. The exception is %s"; + final String msgErrorW3CFormat = "%s isn't a valid W3C date time for %s. The exception is %s"; + final String sitemapPathString = getSitemapPathString(); + final String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; + final Path stagedSitemapPath = Paths.get(stagedSitemapPathAndFileString); + + if (Files.exists(stagedSitemapPath)) { + logger.warning(String.format( + "Unable to update sitemap! The staged file from a previous run already existed. Delete %s and try again.", + stagedSitemapPathAndFileString)); return; } - DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); - DocumentBuilder documentBuilder = null; + final File directory = new File(sitemapPathString); + if (!directory.exists()) { + directory.mkdir(); + } + + // Use DAY pattern (YYYY-MM-DD), local machine timezone + final W3CDateFormat dateFormat = new W3CDateFormat(Pattern.DAY); + WebSitemapGenerator wsg = null; try { - documentBuilder = documentBuilderFactory.newDocumentBuilder(); - } catch (ParserConfigurationException ex) { - logger.warning("Unable to update sitemap! ParserConfigurationException: " + ex.getLocalizedMessage()); + // All sitemap files are in "sitemap" folder, see "getSitemapPathString" method. + // But with pretty-faces configuration, "sitemap.xml" and "sitemap_index.xml" are accessible directly, + // like "https://demo.dataverse.org/sitemap.xml". So "/sitemap/" need to be added on "WebSitemapGenerator" + // in order to have valid URL for sitemap location. + wsg = WebSitemapGenerator.builder(dataverseSiteUrl + "/sitemap/", directory).autoValidate(true).dateFormat(dateFormat) + .build(); + } catch (MalformedURLException e) { + logger.warning(String.format(msgErrorFormat, "Dataverse site URL", dataverseSiteUrl, e.getLocalizedMessage())); return; } - Document document = documentBuilder.newDocument(); - - Element urlSet = document.createElement("urlset"); - urlSet.setAttribute("xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9"); - urlSet.setAttribute("xmlns:xhtml", "http://www.w3.org/1999/xhtml"); - document.appendChild(urlSet); for (Dataverse dataverse : dataverses) { if (!dataverse.isReleased()) { continue; } - Element url = document.createElement("url"); - urlSet.appendChild(url); - - Element loc = document.createElement("loc"); - String dataverseAlias = dataverse.getAlias(); - loc.appendChild(document.createTextNode(SystemConfig.getDataverseSiteUrlStatic() + "/dataverse/" + dataverseAlias)); - url.appendChild(loc); - - Element lastmod = document.createElement("lastmod"); - lastmod.appendChild(document.createTextNode(getLastModDate(dataverse))); - url.appendChild(lastmod); + final String dvAlias = dataverse.getAlias(); + final String dataverseUrl = dataverseSiteUrl + "/dataverse/" + dvAlias; + final String lastModDate = getLastModDate(dataverse); + try { + final WebSitemapUrl url = new WebSitemapUrl.Options(dataverseUrl).lastMod(lastModDate).build(); + wsg.addUrl(url); + } catch (MalformedURLException e) { + logger.fine(String.format(msgErrorFormat, "dataverse URL", dataverseUrl, e.getLocalizedMessage())); + } catch (ParseException e) { + logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataverse alias " + dvAlias, e.getLocalizedMessage())); + } } for (Dataset dataset : datasets) { - if (!dataset.isReleased()) { - continue; - } - if (dataset.isHarvested()) { - continue; - } // The deaccessioned check is last because it has to iterate through dataset versions. - if (dataset.isDeaccessioned()) { + if (!dataset.isReleased() || dataset.isHarvested() || dataset.isDeaccessioned()) { continue; } - Element url = document.createElement("url"); - urlSet.appendChild(url); - - Element loc = document.createElement("loc"); - String datasetPid = dataset.getGlobalId().asString(); - loc.appendChild(document.createTextNode(SystemConfig.getDataverseSiteUrlStatic() + "/dataset.xhtml?persistentId=" + datasetPid)); - url.appendChild(loc); - - Element lastmod = document.createElement("lastmod"); - lastmod.appendChild(document.createTextNode(getLastModDate(dataset))); - url.appendChild(lastmod); - } - - TransformerFactory transformerFactory = TransformerFactory.newInstance(); - Transformer transformer = null; - try { - transformer = transformerFactory.newTransformer(); - } catch (TransformerConfigurationException ex) { - logger.warning("Unable to update sitemap! TransformerConfigurationException: " + ex.getLocalizedMessage()); - return; - } - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); - DOMSource source = new DOMSource(document); - File directory = new File(sitemapPathString); - if (!directory.exists()) { - directory.mkdir(); - } - - boolean debug = false; - if (debug) { - logger.info("Writing sitemap to console/logs"); - StreamResult consoleResult = new StreamResult(System.out); + final String datasetPid = dataset.getGlobalId().asString(); + final String datasetUrl = dataverseSiteUrl + "/dataset.xhtml?persistentId=" + datasetPid; + final String lastModDate = getLastModDate(dataset); try { - transformer.transform(source, consoleResult); - } catch (TransformerException ex) { - logger.warning("Unable to print sitemap to the console: " + ex.getLocalizedMessage()); + final WebSitemapUrl url = new WebSitemapUrl.Options(datasetUrl).lastMod(lastModDate).build(); + wsg.addUrl(url); + } catch (MalformedURLException e) { + logger.fine(String.format(msgErrorFormat, "dataset URL", datasetUrl, e.getLocalizedMessage())); + } catch (ParseException e) { + logger.fine(String.format(msgErrorW3CFormat, lastModDate, "dataset " + datasetPid, e.getLocalizedMessage())); } } - logger.info("Writing staged sitemap to " + stagedSitemapPathAndFileString); - StreamResult result = new StreamResult(new File(stagedSitemapPathAndFileString)); - try { - transformer.transform(source, result); - } catch (TransformerException ex) { - logger.warning("Unable to update sitemap! Unable to write staged sitemap to " + stagedSitemapPathAndFileString + ". TransformerException: " + ex.getLocalizedMessage()); - return; - } - - logger.info("Checking staged sitemap for well-formedness. The staged file is " + stagedSitemapPathAndFileString); + logger.info(String.format("Writing and checking sitemap file into %s", sitemapPathString)); try { - XmlValidator.validateXmlWellFormed(stagedSitemapPathAndFileString); + wsg.write(); + if (dataverses.size() + datasets.size() > SITEMAP_LIMIT) { + wsg.writeSitemapsWithIndex(); + } } catch (Exception ex) { - logger.warning("Unable to update sitemap! Staged sitemap file is not well-formed XML! The exception for " + stagedSitemapPathAndFileString + " is " + ex.getLocalizedMessage()); - return; - } - - logger.info("Checking staged sitemap against XML schema. The staged file is " + stagedSitemapPathAndFileString); - URL schemaUrl = null; - try { - schemaUrl = new URL("https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"); - } catch (MalformedURLException ex) { - // This URL is hard coded and it's fine. We should never get MalformedURLException so we just swallow the exception and carry on. - } - try { - XmlValidator.validateXmlSchema(stagedSitemapPathAndFileString, schemaUrl); - } catch (SAXException | IOException ex) { - logger.warning("Unable to update sitemap! Exception caught while checking XML staged file (" + stagedSitemapPathAndFileString + " ) against XML schema: " + ex.getLocalizedMessage()); + final StringBuffer errorMsg = new StringBuffer("Unable to write or validate sitemap ! The exception is "); + errorMsg.append(ex.getLocalizedMessage()); + // Add causes messages exception + Throwable cause = ex.getCause(); + // Fix limit to 5 causes + final int causeLimit = 5; + int cpt = 0; + while (cause != null && cpt < causeLimit) { + errorMsg.append(" with cause ").append(cause.getLocalizedMessage()); + cause = ex.getCause(); + cpt = cpt + 1; + } + logger.warning(errorMsg.toString()); return; } - Path finalPath = Paths.get(finalSitemapPathAndFileString); - logger.info("Copying staged sitemap from " + stagedSitemapPathAndFileString + " to " + finalSitemapPathAndFileString); + logger.info(String.format("Remove staged sitemap %s", stagedSitemapPathAndFileString)); try { - Files.move(stagedPath, finalPath, StandardCopyOption.REPLACE_EXISTING); + Files.deleteIfExists(stagedSitemapPath); } catch (IOException ex) { - logger.warning("Unable to update sitemap! Unable to copy staged sitemap from " + stagedSitemapPathAndFileString + " to " + finalSitemapPathAndFileString + ". IOException: " + ex.getLocalizedMessage()); + logger.warning("Unable to delete sitemap staged file! IOException: " + ex.getLocalizedMessage()); return; } @@ -199,12 +145,11 @@ private static String getLastModDate(DvObjectContainer dvObjectContainer) { // TODO: Decide if YYYY-MM-DD is enough. https://www.sitemaps.org/protocol.html // says "The date of last modification of the file. This date should be in W3C Datetime format. // This format allows you to omit the time portion, if desired, and use YYYY-MM-DD." - return new SimpleDateFormat("yyyy-MM-dd").format(dvObjectContainer.getModificationTime()); + return dvObjectContainer.getModificationTime().toLocalDateTime().format(formatter); } public static boolean stageFileExists() { - String sitemapPathString = getSitemapPathString(); - String stagedSitemapPathAndFileString = sitemapPathString + File.separator + SITEMAP_FILENAME_STAGED; + String stagedSitemapPathAndFileString = getSitemapPathString() + File.separator + SITEMAP_FILENAME_STAGED; Path stagedPath = Paths.get(stagedSitemapPathAndFileString); if (Files.exists(stagedPath)) { logger.warning("Unable to update sitemap! The staged file from a previous run already existed. Delete " + stagedSitemapPathAndFileString + " and try again."); @@ -212,7 +157,7 @@ public static boolean stageFileExists() { } return false; } - + /** * Lookup the location where to generate the sitemap. * @@ -223,6 +168,6 @@ public static boolean stageFileExists() { */ private static String getSitemapPathString() { return JvmSettings.DOCROOT_DIRECTORY.lookup() + File.separator + "sitemap"; - } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java new file mode 100644 index 00000000000..d00f7041e61 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageQuota.java @@ -0,0 +1,116 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.OneToOne; +import java.io.Serializable; +import java.util.logging.Logger; + +//import jakarta.persistence.*; + +/** + * + * @author landreev + * + */ +@Entity +public class StorageQuota implements Serializable { + private static final Logger logger = Logger.getLogger(StorageQuota.class.getCanonicalName()); + + /** + * Only Collection quotas are supported, for now + */ + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * For defining quotas for Users and/or Groups + * (Not supported as of yet) + + @Column(nullable = true) + private String assigneeIdentifier; + */ + + /** + * Could be changed to ManyToOne - if we wanted to be able to define separate + * quotas on the same collection for different users. (?) + * Whether we actually want to support the above is TBD. (possibly not) + * Only collection-wide quotas are supported for now. + */ + @OneToOne + @JoinColumn(name="definitionPoint_id", nullable=true) + private DvObject definitionPoint; + + @Column(nullable = true) + private Long allocation; + + public StorageQuota() {} + + /** + * Could be uncommented if/when we want to add per-user quotas (see above) + public String getAssigneeIdentifier() { + return assigneeIdentifier; + } + + public void setAssigneeIdentifier(String assigneeIdentifier) { + this.assigneeIdentifier = assigneeIdentifier; + }*/ + + public DvObject getDefinitionPoint() { + return definitionPoint; + } + + public void setDefinitionPoint(DvObject definitionPoint) { + this.definitionPoint = definitionPoint; + } + + public Long getAllocation() { + return allocation; + } + + public void setAllocation(Long allocation) { + this.allocation = allocation; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageQuota)) { + return false; + } + StorageQuota other = (StorageQuota) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageQuota[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java new file mode 100644 index 00000000000..b777736dc8d --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUse.java @@ -0,0 +1,100 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.DvObjectContainer; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GenerationType; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.Index; +import jakarta.persistence.JoinColumn; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import jakarta.persistence.OneToOne; +import jakarta.persistence.Table; +import java.io.Serializable; + +/** + * + * @author landreev + */ +@NamedQueries({ + @NamedQuery(name = "StorageUse.findByteSizeByDvContainerId",query = "SELECT su.sizeInBytes FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.findByDvContainerId",query = "SELECT su FROM StorageUse su WHERE su.dvObjectContainer.id =:dvObjectId "), + @NamedQuery(name = "StorageUse.incrementByteSizeByDvContainerId", query = "UPDATE StorageUse su SET su.sizeInBytes = su.sizeInBytes +:fileSize WHERE su.dvObjectContainer.id =:dvObjectId") +}) +@Entity +@Table(indexes = {@Index(columnList="dvobjectcontainer_id")}) +public class StorageUse implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + @OneToOne + @JoinColumn(nullable=false) + private DvObject dvObjectContainer; + + @Column + private Long sizeInBytes = null; + + public StorageUse() {} + + public StorageUse(DvObjectContainer dvObjectContainer) { + this(dvObjectContainer, 0L); + } + + public StorageUse(DvObjectContainer dvObjectContainer, Long sizeInBytes) { + this.dvObjectContainer = dvObjectContainer; + this.sizeInBytes = sizeInBytes; + } + + public Long getSizeInBytes() { + return sizeInBytes; + } + + public void setSizeInBytes(Long sizeInBytes) { + this.sizeInBytes = sizeInBytes; + } + + public void incrementSizeInBytes(Long sizeInBytes) { + this.sizeInBytes += sizeInBytes; + } + + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof StorageUse)) { + return false; + } + StorageUse other = (StorageUse) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.storageuse.StorageUse[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java new file mode 100644 index 00000000000..7aea7a7b596 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/StorageUseServiceBean.java @@ -0,0 +1,72 @@ +package edu.harvard.iq.dataverse.storageuse; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import jakarta.ejb.Stateless; +import jakarta.ejb.TransactionAttribute; +import jakarta.ejb.TransactionAttributeType; +import jakarta.inject.Named; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import java.util.Optional; +import java.util.logging.Logger; + +/** + * + * @author landreev + */ +@Stateless +@Named +public class StorageUseServiceBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(StorageUseServiceBean.class.getCanonicalName()); + + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + public StorageUse findByDvContainerId(Long dvObjectId) { + return em.createNamedQuery("StorageUse.findByDvContainerId", StorageUse.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + } + + /** + * Looks up the current storage use size, using a named query in a new + * transaction + * @param dvObjectId + * @return + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public Long findStorageSizeByDvContainerId(Long dvObjectId) { + Long res = em.createNamedQuery("StorageUse.findByteSizeByDvContainerId", Long.class).setParameter("dvObjectId", dvObjectId).getSingleResult(); + return res == null ? 0L : res; + } + + /** + * Increments the recorded storage size for all the dvobject parents of a + * datafile, recursively. + * @param dvObjectContainerId database id of the immediate parent (dataset) + * @param increment size in bytes of the file(s) being added + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void incrementStorageSizeRecursively(Long dvObjectContainerId, Long increment) { + if (dvObjectContainerId != null && increment != null) { + Optional allow = JvmSettings.STORAGEUSE_DISABLE_UPDATES.lookupOptional(Boolean.class); + if (!(allow.isPresent() && allow.get())) { + String queryString = "WITH RECURSIVE uptree (id, owner_id) AS\n" + + "(" + + " SELECT id, owner_id\n" + + " FROM dvobject\n" + + " WHERE id=" + dvObjectContainerId + "\n" + + " UNION ALL\n" + + " SELECT dvobject.id, dvobject.owner_id\n" + + " FROM dvobject\n" + + " JOIN uptree ON dvobject.id = uptree.owner_id)\n" + + "UPDATE storageuse SET sizeinbytes=COALESCE(sizeinbytes,0)+" + increment + "\n" + + "FROM uptree\n" + + "WHERE dvobjectcontainer_id = uptree.id;"; + + int parentsUpdated = em.createNativeQuery(queryString).executeUpdate(); + } + } + // @todo throw an exception if the number of parent dvobjects updated by + // the query is < 2 - ? + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java new file mode 100644 index 00000000000..f7dac52e886 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/storageuse/UploadSessionQuotaLimit.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.storageuse; + +/** + * + * @author landreev + */ +public class UploadSessionQuotaLimit { + private Long totalAllocatedInBytes = 0L; + private Long totalUsageInBytes = 0L; + + public UploadSessionQuotaLimit(Long allocated, Long used) { + this.totalAllocatedInBytes = allocated; + this.totalUsageInBytes = used; + } + + public Long getTotalAllocatedInBytes() { + return totalAllocatedInBytes; + } + + public void setTotalAllocatedInBytes(Long totalAllocatedInBytes) { + this.totalAllocatedInBytes = totalAllocatedInBytes; + } + + public Long getTotalUsageInBytes() { + return totalUsageInBytes; + } + + public void setTotalUsageInBytes(Long totalUsageInBytes) { + this.totalUsageInBytes = totalUsageInBytes; + } + + public Long getRemainingQuotaInBytes() { + if (totalUsageInBytes > totalAllocatedInBytes) { + return 0L; + } + return totalAllocatedInBytes - totalUsageInBytes; + } + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index 1ad389fb0e2..a0c32d5c8ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -21,14 +21,8 @@ package edu.harvard.iq.dataverse.util; -import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DataFile.ChecksumType; -import edu.harvard.iq.dataverse.DataFileServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.S3AccessIO; @@ -86,6 +80,7 @@ import java.util.HashMap; import java.util.List; import java.util.Optional; +import java.util.ResourceBundle; import java.util.UUID; import java.util.logging.Level; import java.util.logging.Logger; @@ -182,6 +177,7 @@ public class FileUtil implements java.io.Serializable { public static final String MIME_TYPE_NETCDF = "application/netcdf"; public static final String MIME_TYPE_XNETCDF = "application/x-netcdf"; public static final String MIME_TYPE_HDF5 = "application/x-hdf5"; + public static final String MIME_TYPE_RO_CRATE = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; // File type "thumbnail classes" tags: @@ -278,6 +274,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { if (fileType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE)){ return ShapefileHandler.SHAPEFILE_FILE_TYPE_FRIENDLY_NAME; } + try { + return BundleUtil.getStringFromPropertyFile(fileType,"MimeTypeDisplay" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } if (fileType.contains(";")) { fileType = fileType.substring(0, fileType.indexOf(";")); } @@ -292,6 +293,11 @@ public static String getUserFriendlyFileType(DataFile dataFile) { } public static String getIndexableFacetFileType(DataFile dataFile) { + try { + return BundleUtil.getStringFromDefaultPropertyFile(dataFile.getContentType(),"MimeTypeFacets" ); + } catch (MissingResourceException e) { + //NOOP: we will try again after trimming ";" + } String fileType = getFileType(dataFile); try { return BundleUtil.getStringFromDefaultPropertyFile(fileType,"MimeTypeFacets" ); @@ -421,7 +427,10 @@ public static String retestIngestableFileType(File file, String fileType) { } public static String determineFileType(File f, String fileName) throws IOException{ - String fileType = null; + String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; + } String fileExtension = getFileExtension(fileName); @@ -480,17 +489,17 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if (fileType != null && fileType.startsWith("text/plain") && STATISTICAL_FILE_EXTENSION.containsKey(fileExtension)) { fileType = STATISTICAL_FILE_EXTENSION.get(fileExtension); } else { - fileType = determineFileTypeByNameAndExtension(fileName); + fileType = lookupFileTypeByExtension(fileName); } logger.fine("mime type recognized by extension: "+fileType); } } else { logger.fine("fileExtension is null"); - String fileTypeByName = lookupFileTypeFromPropertiesFile(fileName); - if(!StringUtil.isEmpty(fileTypeByName)) { - logger.fine(String.format("mime type: %s recognized by filename: %s", fileTypeByName, fileName)); - fileType = fileTypeByName; + final String fileTypeByExtension = lookupFileTypeByExtensionFromPropertiesFile(fileName); + if(!StringUtil.isEmpty(fileTypeByExtension)) { + logger.fine(String.format("mime type: %s recognized by extension: %s", fileTypeByExtension, fileName)); + fileType = fileTypeByExtension; } } @@ -501,24 +510,15 @@ public static String determineFileType(File f, String fileName) throws IOExcepti if ("application/x-gzip".equals(fileType)) { logger.fine("we'll run additional checks on this gzipped file."); - // We want to be able to support gzipped FITS files, same way as - // if they were just regular FITS files: - FileInputStream gzippedIn = new FileInputStream(f); - // (new FileInputStream() can throw a "filen not found" exception; - // however, if we've made it this far, it really means that the - // file does exist and can be opened) - InputStream uncompressedIn = null; - try { - uncompressedIn = new GZIPInputStream(gzippedIn); + try (FileInputStream gzippedIn = new FileInputStream(f); + InputStream uncompressedIn = new GZIPInputStream(gzippedIn)) { if (isFITSFile(uncompressedIn)) { fileType = "application/fits-gzipped"; } } catch (IOException ioex) { - if (uncompressedIn != null) { - try {uncompressedIn.close();} catch (IOException e) {} - } + logger.warning("IOException while processing gzipped FITS file: " + ioex.getMessage()); } - } + } if ("application/zip".equals(fileType)) { // Is this a zipped Shapefile? @@ -544,33 +544,41 @@ public static String determineFileType(File f, String fileName) throws IOExcepti return fileType; } - public static String determineFileTypeByNameAndExtension(String fileName) { - String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); + public static String determineFileTypeByNameAndExtension(final String fileName) { + final String fileType = lookupFileTypeByFileName(fileName); + if (fileType != null) { + return fileType; + } + return lookupFileTypeByExtension(fileName); + } + + private static String lookupFileTypeByExtension(final String fileName) { + final String mimetypesFileTypeMapResult = MIME_TYPE_MAP.getContentType(fileName); logger.fine("MimetypesFileTypeMap type by extension, for " + fileName + ": " + mimetypesFileTypeMapResult); - if (mimetypesFileTypeMapResult != null) { - if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { - return lookupFileTypeFromPropertiesFile(fileName); - } else { - return mimetypesFileTypeMapResult; - } - } else { + if (mimetypesFileTypeMapResult == null) { return null; } + if ("application/octet-stream".equals(mimetypesFileTypeMapResult)) { + return lookupFileTypeByExtensionFromPropertiesFile(fileName); + } + return mimetypesFileTypeMapResult; } - public static String lookupFileTypeFromPropertiesFile(String fileName) { - String fileKey = FilenameUtils.getExtension(fileName); - String propertyFileName = "MimeTypeDetectionByFileExtension"; - if(fileKey == null || fileKey.isEmpty()) { - fileKey = fileName; - propertyFileName = "MimeTypeDetectionByFileName"; + private static String lookupFileTypeByFileName(final String fileName) { + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileName", fileName); + } - } - String propertyFileNameOnDisk = propertyFileName + ".properties"; + private static String lookupFileTypeByExtensionFromPropertiesFile(final String fileName) { + final String fileKey = FilenameUtils.getExtension(fileName); + return lookupFileTypeFromPropertiesFile("MimeTypeDetectionByFileExtension", fileKey); + } + + private static String lookupFileTypeFromPropertiesFile(final String propertyFileName, final String fileKey) { + final String propertyFileNameOnDisk = propertyFileName + ".properties"; try { logger.fine("checking " + propertyFileNameOnDisk + " for file key " + fileKey); return BundleUtil.getStringFromPropertyFile(fileKey, propertyFileName); - } catch (MissingResourceException ex) { + } catch (final MissingResourceException ex) { logger.info(fileKey + " is a filename/extension Dataverse doesn't know about. Consider adding it to the " + propertyFileNameOnDisk + " file."); return null; } @@ -825,7 +833,8 @@ public static boolean useRecognizedType(String suppliedContentType, String recog || canIngestAsTabular(recognizedType) || recognizedType.equals("application/fits-gzipped") || recognizedType.equalsIgnoreCase(ShapefileHandler.SHAPEFILE_FILE_TYPE) || recognizedType.equalsIgnoreCase(BagItFileHandler.FILE_TYPE) - || recognizedType.equals(MIME_TYPE_ZIP)) { + || recognizedType.equals(MIME_TYPE_ZIP) + || recognizedType.equals(MIME_TYPE_RO_CRATE)) { return true; } return false; @@ -1199,34 +1208,12 @@ public static boolean isGuestbookPopupRequired(DatasetVersion datasetVersion) { } public static boolean isTermsPopupRequired(DatasetVersion datasetVersion) { - - if (datasetVersion == null) { - logger.fine("TermsPopup not required because datasetVersion is null."); - return false; - } - //0. if version is draft then Popup "not required" - if (!datasetVersion.isReleased()) { - logger.fine("TermsPopup not required because datasetVersion has not been released."); + Boolean answer = popupDueToStateOrTerms(datasetVersion); + if(answer == null) { + logger.fine("TermsPopup is not required."); return false; } - // 1. License and Terms of Use: - if (datasetVersion.getTermsOfUseAndAccess() != null) { - if (!License.CC0.equals(datasetVersion.getTermsOfUseAndAccess().getLicense()) - && !(datasetVersion.getTermsOfUseAndAccess().getTermsOfUse() == null - || datasetVersion.getTermsOfUseAndAccess().getTermsOfUse().equals(""))) { - logger.fine("TermsPopup required because of license or terms of use."); - return true; - } - - // 2. Terms of Access: - if (!(datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess() == null) && !datasetVersion.getTermsOfUseAndAccess().getTermsOfAccess().equals("")) { - logger.fine("TermsPopup required because of terms of access."); - return true; - } - } - - logger.fine("TermsPopup is not required."); - return false; + return answer; } /** @@ -1245,6 +1232,9 @@ public static boolean isPubliclyDownloadable(FileMetadata fileMetadata) { if (isActivelyEmbargoed(fileMetadata)) { return false; } + if (isRetentionExpired(fileMetadata)) { + return false; + } boolean popupReasons = isDownloadPopupRequired(fileMetadata.getDatasetVersion()); if (popupReasons == true) { /** @@ -1449,6 +1439,17 @@ public static S3AccessIO getS3AccessForDirectUpload(Dataset dataset) { return s3io; } + private static InputStream getOriginalFileInputStream(StorageIO storage, boolean isTabularData) throws IOException { + storage.open(DataAccessOption.READ_ACCESS); + if (!isTabularData) { + return storage.getInputStream(); + } else { + // if this is a tabular file, read the preserved original "auxiliary file" + // instead: + return storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } + } + public static void validateDataFileChecksum(DataFile dataFile) throws IOException { DataFile.ChecksumType checksumType = dataFile.getChecksumType(); if (checksumType == null) { @@ -1458,35 +1459,24 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio } StorageIO storage = dataFile.getStorageIO(); - InputStream in = null; - - try { - storage.open(DataAccessOption.READ_ACCESS); + String recalculatedChecksum = null; - if (!dataFile.isTabularData()) { - in = storage.getInputStream(); - } else { - // if this is a tabular file, read the preserved original "auxiliary file" - // instead: - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } + try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) { + recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType); } catch (IOException ioex) { - in = null; - } - - if (in == null) { String info = BundleUtil.getStringFromBundle("dataset.publish.file.validation.error.failRead", Arrays.asList(dataFile.getId().toString())); logger.log(Level.INFO, info); throw new IOException(info); - } - - String recalculatedChecksum = null; - try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); } catch (RuntimeException rte) { + logger.log(Level.SEVERE, "failed to calculated checksum, one retry", rte); recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); + } + + if (recalculatedChecksum == null) { //retry once + storage = dataFile.getStorageIO(); + try (InputStream inputStream = getOriginalFileInputStream(storage, dataFile.isTabularData())) { + recalculatedChecksum = FileUtil.calculateChecksum(inputStream, checksumType); + } } if (recalculatedChecksum == null) { @@ -1504,19 +1494,12 @@ public static void validateDataFileChecksum(DataFile dataFile) throws IOExceptio boolean fixed = false; if (!dataFile.isTabularData() && dataFile.getIngestReport() != null) { // try again, see if the .orig file happens to be there: - try { - in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); - } catch (IOException ioex) { - in = null; + try (InputStream in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION)) { + recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); + } catch (RuntimeException rte) { + recalculatedChecksum = null; } - if (in != null) { - try { - recalculatedChecksum = FileUtil.calculateChecksum(in, checksumType); - } catch (RuntimeException rte) { - recalculatedChecksum = null; - } finally { - IOUtils.closeQuietly(in); - } + if (recalculatedChecksum != null) { // try again: if (recalculatedChecksum.equals(dataFile.getChecksumValue())) { fixed = true; @@ -1804,5 +1787,42 @@ public static boolean isActivelyEmbargoed(List fmdList) { } return false; } - + + public static boolean isRetentionExpired(DataFile df) { + Retention e = df.getRetention(); + if (e != null) { + LocalDate endDate = e.getDateUnavailable(); + if (endDate != null && endDate.isBefore(LocalDate.now())) { + return true; + } + } + return false; + } + + public static boolean isRetentionExpired(FileMetadata fileMetadata) { + return isRetentionExpired(fileMetadata.getDataFile()); + } + + public static boolean isRetentionExpired(List fmdList) { + for (FileMetadata fmd : fmdList) { + if (isRetentionExpired(fmd)) { + return true; + } + } + return false; + } + + public static String getStorageDriver(DataFile dataFile) { + String storageIdentifier = dataFile.getStorageIdentifier(); + return storageIdentifier.substring(0, storageIdentifier.indexOf(DataAccess.SEPARATOR)); + } + + /** + * Replace spaces with "_" and remove invalid chars + * @param fileNameIn - Name before sanitization NOTE: not full path since this method removes '/' and '\' + * @return filename without spaces or invalid chars + */ + public static String sanitizeFileName(String fileNameIn) { + return fileNameIn == null ? null : fileNameIn.replace(' ', '_').replaceAll("[\\\\/:*?\"<>|,;]", ""); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailSessionProducer.java b/src/main/java/edu/harvard/iq/dataverse/util/MailSessionProducer.java new file mode 100644 index 00000000000..149f92761d2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailSessionProducer.java @@ -0,0 +1,150 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.settings.JvmSettings; +import jakarta.annotation.Resource; +import jakarta.enterprise.context.ApplicationScoped; +import jakarta.enterprise.inject.Produces; +import jakarta.inject.Named; +import jakarta.mail.Authenticator; +import jakarta.mail.PasswordAuthentication; +import jakarta.mail.Session; + +import javax.naming.Context; +import javax.naming.InitialContext; +import javax.naming.NamingException; +import java.util.List; +import java.util.Properties; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +@ApplicationScoped +public class MailSessionProducer { + + // NOTE: We do not allow "from" here, as we want the transport to get it from the message being sent, enabling + // matching addresses. If "from" in transport and "from" in the message differ, some MTAs may reject or + // classify as spam. + // NOTE: Complete list including descriptions at https://eclipse-ee4j.github.io/angus-mail/docs/api/org.eclipse.angus.mail/org/eclipse/angus/mail/smtp/package-summary.html + static final List smtpStringProps = List.of( + "host", "localhost", "localaddress", "auth.mechanisms", "auth.ntlm.domain", "submitter", "dsn.notify", "dsn.ret", + "sasl.mechanisms", "sasl.authorizationid", "sasl.realm", "ssl.trust", "ssl.protocols", "ssl.ciphersuites", + "proxy.host", "proxy.port", "proxy.user", "proxy.password", "socks.host", "socks.port", "mailextension" + ); + static final List smtpIntProps = List.of( + "port", "connectiontimeout", "timeout", "writetimeout", "localport", "auth.ntlm.flag" + ); + static final List smtpBoolProps = List.of( + "auth", "ehlo", "auth.login.disable", "auth.plain.disable", "auth.digest-md5.disable", "auth.ntlm.disable", + "auth.xoauth2.disable", "allow8bitmime", "sendpartial", "sasl.enable", "sasl.usecanonicalhostname", + "quitwait", "quitonsessionreject", "ssl.enable", "ssl.checkserveridentity", "starttls.enable", + "starttls.required", "userset", "noop.strict" + ); + + private static final String PREFIX = "mail.smtp."; + private static final Logger logger = Logger.getLogger(MailSessionProducer.class.getCanonicalName()); + + static { + if (Boolean.TRUE.equals(JvmSettings.MAIL_DEBUG.lookup(Boolean.class))) { + logger.setLevel(Level.FINE); + } + } + + Session systemMailSession; + + /** + * Cache the application server provided (user defined) javamail resource to enable backwards compatibility. + * No direct JNDI lookup on the field to avoid deployment failures when not present. + * @deprecated This should be removed with the next major release of Dataverse, as it would be a breaking change. + */ + @Deprecated(forRemoval = true, since = "6.2") + Session appserverProvidedSession; + + public MailSessionProducer() { + try { + // Do JNDI lookup of legacy mail session programmatically to avoid deployment errors when not found. + Context initialContext = new InitialContext(); + this.appserverProvidedSession = (Session)initialContext.lookup("mail/notifyMailSession"); + } catch (NamingException e) { + // This exception simply means the appserver did not provide the legacy mail session. + // Debug level output is just fine. + logger.log(Level.FINER, "Error during legacy appserver-level mail resource lookup", e); + } + } + + @Produces + @Named("mail/systemSession") + public Session getSession() { + // For backward compatibility, prefer to return the mail resource configured on the appserver. + if (appserverProvidedSession != null) { + logger.warning("The configuration of mail transfer agents using asadmin create-javamail-resource is" + + " deprecated. Please migrate to using JVM options, see Dataverse guides for details"); + return appserverProvidedSession; + } + + if (systemMailSession == null) { + logger.fine("Setting up new mail session"); + + // Initialize with null (= no authenticator) is a valid argument for the session factory method. + Authenticator authenticator = null; + + // In case we want auth, create an authenticator (default = false from microprofile-config.properties) + if (Boolean.TRUE.equals(JvmSettings.MAIL_MTA_AUTH.lookup(Boolean.class))) { + logger.fine("Mail Authentication is enabled, building authenticator"); + authenticator = new Authenticator() { + @Override + protected PasswordAuthentication getPasswordAuthentication() { + logger.fine(() -> + String.format("Returning PasswordAuthenticator with username='%s', password='%s'", + JvmSettings.MAIL_MTA_USER.lookup(), + "*".repeat(JvmSettings.MAIL_MTA_PASSWORD.lookup().length()))); + return new PasswordAuthentication(JvmSettings.MAIL_MTA_USER.lookup(), JvmSettings.MAIL_MTA_PASSWORD.lookup()); + } + }; + } + + this.systemMailSession = Session.getInstance(getMailProperties(), authenticator); + } + return systemMailSession; + } + + Properties getMailProperties() { + Properties configuration = new Properties(); + + // See https://jakarta.ee/specifications/mail/2.1/apidocs/jakarta.mail/jakarta/mail/package-summary + configuration.put("mail.transport.protocol", "smtp"); + configuration.put("mail.debug", JvmSettings.MAIL_DEBUG.lookupOptional(Boolean.class).orElse(false).toString()); + // Only enable if your MTA properly supports UTF-8 mail addresses following RFC 6530/6531/6532. + // Before, we used a hack to put the raw UTF-8 mail address into the system. + // Now, make it proper, but make it possible to disable it - see also EMailValidator. + // Default = true from microprofile-config.properties as most MTAs these days support SMTPUTF8 extension + configuration.put("mail.mime.allowutf8", JvmSettings.MAIL_MTA_SUPPORT_UTF8.lookup(Boolean.class).toString()); + + // Map properties 1:1 to mail.smtp properties for the mail session. + smtpStringProps.forEach( + prop -> JvmSettings.MAIL_MTA_SETTING.lookupOptional(prop).ifPresent( + string -> configuration.put(PREFIX + prop, string))); + smtpBoolProps.forEach( + prop -> JvmSettings.MAIL_MTA_SETTING.lookupOptional(Boolean.class, prop).ifPresent( + bool -> configuration.put(PREFIX + prop, bool.toString()))); + smtpIntProps.forEach( + prop -> JvmSettings.MAIL_MTA_SETTING.lookupOptional(Integer.class, prop).ifPresent( + number -> configuration.put(PREFIX + prop, number.toString()))); + + logger.fine(() -> "Compiled properties:" + configuration.entrySet().stream() + .map(entry -> "\"" + entry.getKey() + "\": \"" + entry.getValue() + "\"") + .collect(Collectors.joining(",\n"))); + + return configuration; + } + + /** + * Determine if the session returned by {@link #getSession()} has been provided by the application server + * @return True if injected as resource from app server, false otherwise + * @deprecated This is supposed to be removed when {@link #appserverProvidedSession} is removed. + */ + @Deprecated(forRemoval = true, since = "6.2") + public boolean hasSessionFromAppServer() { + return this.appserverProvidedSession != null; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 0724e53700b..36c249de834 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -5,32 +5,14 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.UserNotification; import edu.harvard.iq.dataverse.branding.BrandingUtil; -import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.SystemEmail; import java.util.Arrays; import java.util.List; import java.util.logging.Logger; -import jakarta.mail.internet.AddressException; -import jakarta.mail.internet.InternetAddress; public class MailUtil { private static final Logger logger = Logger.getLogger(MailUtil.class.getCanonicalName()); - public static InternetAddress parseSystemAddress(String systemEmail) { - if (systemEmail != null) { - try { - InternetAddress parsedSystemEmail = new InternetAddress(systemEmail); - logger.fine("parsed system email: " + parsedSystemEmail); - return parsedSystemEmail; - } catch (AddressException ex) { - logger.info("Email will not be sent due to invalid value in " + SystemEmail + " setting: " + ex); - return null; - } - } - logger.fine("Email will not be sent because the " + SystemEmail + " setting is null."); - return null; - } - public static String getSubjectTextBasedOnNotification(UserNotification userNotification, Object objectOfNotification) { List rootDvNameAsList = Arrays.asList(BrandingUtil.getInstallationBrandName()); String datasetDisplayName = ""; @@ -53,7 +35,10 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti case CREATEDV: return BundleUtil.getStringFromBundle("notification.email.create.dataverse.subject", rootDvNameAsList); case REQUESTFILEACCESS: - return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); + String userNameFirst = userNotification.getRequestor().getFirstName(); + String userNameLast = userNotification.getRequestor().getLastName(); + String userIdentifier = userNotification.getRequestor().getIdentifier(); + return BundleUtil.getStringFromBundle("notification.email.request.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), userNameFirst, userNameLast, userIdentifier, datasetDisplayName)); case REQUESTEDFILEACCESS: return BundleUtil.getStringFromBundle("notification.email.requested.file.access.subject", Arrays.asList(rootDvNameAsList.get(0), datasetDisplayName)); case GRANTFILEACCESS: diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java index 3af562882f3..9786fda4217 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java @@ -72,7 +72,7 @@ public class ShapefileHandler{ public final static List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj"); public final static String SHP_XML_EXTENSION = "shp.xml"; public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__"; - public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", SHP_XML_EXTENSION); + public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION); public boolean DEBUG = false; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java index 1826689b892..b6f8870aa2d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java @@ -19,6 +19,8 @@ Two configurable options allow changing the limit for the number of authors or d import jakarta.json.Json; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObjectBuilder; +import org.apache.commons.validator.routines.UrlValidator; + import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -164,12 +166,11 @@ private List getAuthorURLs(boolean limit) { for (DatasetAuthor da : workingDatasetVersion.getDatasetAuthors()) { logger.fine(String.format("idtype: %s; idvalue: %s, affiliation: %s; identifierUrl: %s", da.getIdType(), da.getIdValue(), da.getAffiliation(), da.getIdentifierAsUrl())); - String authorURL = ""; - authorURL = getAuthorUrl(da); + String authorURL = getAuthorUrl(da); if (authorURL != null && !authorURL.isBlank()) { // return empty if number of visible author more than max allowed // >= since we're comparing before incrementing visibleAuthorCounter - if (visibleAuthorCounter >= maxAuthors) { + if (limit && visibleAuthorCounter >= maxAuthors) { authorURLs.clear(); break; } @@ -211,15 +212,22 @@ private String getAuthorsAsString(List datasetAuthorURLs) { * */ private String getAuthorUrl(DatasetAuthor da) { - String authorURL = ""; - //If no type and there's a value, assume it is a URL (is this reasonable?) - //Otherise, get the URL using the type and value - if (da.getIdType() != null && !da.getIdType().isBlank() && da.getIdValue()!=null) { - authorURL = da.getIdValue(); - } else { - authorURL = da.getIdentifierAsUrl(); + + final String identifierAsUrl = da.getIdentifierAsUrl(); + // First, try to get URL using the type and value + if(identifierAsUrl != null) { + return identifierAsUrl; } - return authorURL; + + final String idValue = da.getIdValue(); + UrlValidator urlValidator = new UrlValidator(new String[]{"http", "https"}); + // Otherwise, try to use idValue as url if it's valid + if(urlValidator.isValid(idValue)) { + return idValue; + } + + // No url found + return null; } private JsonArrayBuilder getJsonAuthors(List datasetAuthorURLs) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java index 33c87563104..137ae21d793 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/StringUtil.java @@ -2,9 +2,13 @@ import edu.harvard.iq.dataverse.authorization.providers.oauth2.OAuth2LoginBackingBean; import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.security.InvalidAlgorithmParameterException; import java.security.InvalidKeyException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.security.spec.AlgorithmParameterSpec; import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; @@ -19,6 +23,7 @@ import javax.crypto.Cipher; import javax.crypto.IllegalBlockSizeException; import javax.crypto.NoSuchPaddingException; +import javax.crypto.spec.GCMParameterSpec; import javax.crypto.spec.SecretKeySpec; import org.apache.commons.lang3.StringUtils; @@ -117,6 +122,10 @@ public static List htmlArray2textArray(List htmlArray) { return cleanTextArray; } + private final static SecureRandom secureRandom = new SecureRandom(); + // 12 bytes is recommended by GCM spec + private final static int GCM_IV_LENGTH = 12; + /** * Generates an AES-encrypted version of the string. Resultant string is URL safe. * @param value The value to encrypt. @@ -124,19 +133,26 @@ public static List htmlArray2textArray(List htmlArray) { * @return encrypted string, URL-safe. */ public static String encrypt(String value, String password ) { + byte[] baseBytes = value.getBytes(); try { - Cipher aes = Cipher.getInstance("AES"); + byte[] iv = new byte[GCM_IV_LENGTH]; //NEVER REUSE THIS IV WITH SAME KEY + secureRandom.nextBytes(iv); + Cipher aes = Cipher.getInstance("AES/GCM/NoPadding"); final SecretKeySpec secretKeySpec = generateKeyFromString(password); - aes.init(Cipher.ENCRYPT_MODE, secretKeySpec); + GCMParameterSpec parameterSpec = new GCMParameterSpec(128, iv); + aes.init(Cipher.ENCRYPT_MODE, secretKeySpec, parameterSpec); byte[] encrypted = aes.doFinal(baseBytes); - String base64ed = new String(Base64.getEncoder().encode(encrypted)); + ByteBuffer byteBuffer = ByteBuffer.allocate(iv.length + encrypted.length); + byteBuffer.put(iv); + byteBuffer.put(encrypted); + String base64ed = new String(Base64.getEncoder().encode(byteBuffer.array())); return base64ed.replaceAll("\\+", ".") .replaceAll("=", "-") .replaceAll("/", "_"); } catch ( InvalidKeyException | NoSuchAlgorithmException | BadPaddingException - | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException ex) { + | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException | InvalidAlgorithmParameterException ex) { Logger.getLogger(OAuth2LoginBackingBean.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex); } @@ -149,13 +165,15 @@ public static String decrypt(String value, String password ) { byte[] baseBytes = Base64.getDecoder().decode(base64); try { - Cipher aes = Cipher.getInstance("AES"); - aes.init( Cipher.DECRYPT_MODE, generateKeyFromString(password)); - byte[] decrypted = aes.doFinal(baseBytes); + Cipher aes = Cipher.getInstance("AES/GCM/NoPadding"); + //use first 12 bytes for iv + AlgorithmParameterSpec gcmIv = new GCMParameterSpec(128, baseBytes, 0, GCM_IV_LENGTH); + aes.init( Cipher.DECRYPT_MODE, generateKeyFromString(password),gcmIv); + byte[] decrypted = aes.doFinal(baseBytes,GCM_IV_LENGTH, baseBytes.length - GCM_IV_LENGTH); return new String(decrypted); } catch ( InvalidKeyException | NoSuchAlgorithmException | BadPaddingException - | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException ex) { + | IllegalBlockSizeException | NoSuchPaddingException | UnsupportedEncodingException | InvalidAlgorithmParameterException ex) { Logger.getLogger(OAuth2LoginBackingBean.class.getName()).log(Level.SEVERE, null, ex); throw new RuntimeException(ex); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 079cbaa999d..f9801419e47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -752,6 +752,7 @@ public enum FileUploadMethods { * DCM stands for Data Capture Module. Right now it supports upload over * rsync+ssh but DCM may support additional methods in the future. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("dcm/rsync+ssh"), /** * Traditional Dataverse file handling, which tends to involve users @@ -809,6 +810,7 @@ public enum FileDownloadMethods { * RSAL stands for Repository Storage Abstraction Layer. Downloads don't * go through Glassfish. */ + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsal/rsync"), NATIVE("native/http"), GLOBUS("globus") @@ -862,6 +864,7 @@ public String toString() { */ public enum TransferProtocols { + @Deprecated(forRemoval = true, since = "2024-07-07") RSYNC("rsync"), /** * POSIX includes NFS. This is related to Key.LocalDataAccessPath in @@ -898,7 +901,8 @@ public boolean isPublicInstall(){ boolean saneDefault = false; return settingsService.isTrueForKey(SettingsServiceBean.Key.PublicInstall, saneDefault); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), true); } @@ -915,7 +919,8 @@ public boolean isWebloaderUpload(){ public boolean isHTTPUpload(){ return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), true); } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncOnly(){ String downloadMethods = settingsService.getValueForKey(SettingsServiceBean.Key.DownloadMethods); if(downloadMethods == null){ @@ -931,28 +936,24 @@ public boolean isRsyncOnly(){ return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size() == 1 && uploadMethods.toLowerCase().equals(SystemConfig.FileUploadMethods.RSYNC.toString()); } } - + + @Deprecated(forRemoval = true, since = "2024-07-07") public boolean isRsyncDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.RSYNC.toString(), false); } - + public boolean isHTTPDownload() { return getMethodAvailable(SystemConfig.FileUploadMethods.NATIVE.toString(), false); } public boolean isGlobusDownload() { - return getMethodAvailable(FileUploadMethods.GLOBUS.toString(), false); + return getMethodAvailable(FileDownloadMethods.GLOBUS.toString(), false); } public boolean isGlobusFileDownload() { return (isGlobusDownload() && settingsService.isTrueForKey(SettingsServiceBean.Key.GlobusSingleFileTransfer, false)); } - public List getGlobusStoresList() { - String globusStores = settingsService.getValueForKey(SettingsServiceBean.Key.GlobusStores, ""); - return Arrays.asList(globusStores.split("\\s*,\\s*")); - } - private Boolean getMethodAvailable(String method, boolean upload) { String methods = settingsService.getValueForKey( upload ? SettingsServiceBean.Key.UploadMethods : SettingsServiceBean.Key.DownloadMethods); @@ -971,25 +972,6 @@ public Integer getUploadMethodCount(){ return Arrays.asList(uploadMethods.toLowerCase().split("\\s*,\\s*")).size(); } } - public boolean isDataFilePIDSequentialDependent(){ - String doiIdentifierType = settingsService.getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "randomString"); - String doiDataFileFormat = settingsService.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); - if (doiIdentifierType.equals("storedProcGenerated") && doiDataFileFormat.equals("DEPENDENT")){ - return true; - } - return false; - } - - public int getPIDAsynchRegFileCount() { - String fileCount = settingsService.getValueForKey(SettingsServiceBean.Key.PIDAsynchRegFileCount, "10"); - int retVal = 10; - try { - retVal = Integer.parseInt(fileCount); - } catch (NumberFormatException e) { - //if no number in the setting we'll return 10 - } - return retVal; - } public boolean isAllowCustomTerms() { boolean safeDefaultIfKeyNotFound = true; @@ -1021,16 +1003,7 @@ public boolean isFilePIDsEnabledForCollection(Dataverse collection) { return thisCollection.getFilePIDsEnabled(); } - public boolean isIndependentHandleService() { - boolean safeDefaultIfKeyNotFound = false; - return settingsService.isTrueForKey(SettingsServiceBean.Key.IndependentHandleService, safeDefaultIfKeyNotFound); - - } - - public String getHandleAuthHandle() { - String handleAuthHandle = settingsService.getValueForKey(SettingsServiceBean.Key.HandleAuthHandle, null); - return handleAuthHandle; - } + public String getMDCLogPath() { String mDCLogPath = settingsService.getValueForKey(SettingsServiceBean.Key.MDCLogPath, null); @@ -1178,4 +1151,22 @@ public boolean isStorageQuotasEnforced() { public Long getTestStorageQuotaLimit() { return settingsService.getValueForKeyAsLong(SettingsServiceBean.Key.StorageQuotaSizeInBytes); } + /** + * Should we store tab-delimited files produced during ingest *with* the + * variable name header line included? + * @return boolean - defaults to false. + */ + public boolean isStoringIngestedFilesWithHeaders() { + return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false); + } + + /** + * RateLimitUtil will parse the json to create a List + */ + public String getRateLimitsJson() { + return settingsService.getValueForKey(SettingsServiceBean.Key.RateLimitingCapacityByTierAndAction, ""); + } + public String getRateLimitingDefaultCapacityTiers() { + return settingsService.getValueForKey(SettingsServiceBean.Key.RateLimitingDefaultCapacityTiers, ""); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java index c864823176e..90557a530c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/URLTokenUtil.java @@ -1,11 +1,16 @@ package edu.harvard.iq.dataverse.util; import java.util.Arrays; +import java.util.Random; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import jakarta.json.Json; +import jakarta.json.JsonArray; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonObject; +import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonValue; import edu.harvard.iq.dataverse.DataFile; @@ -13,6 +18,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; @@ -24,6 +31,13 @@ public class URLTokenUtil { protected final FileMetadata fileMetadata; protected ApiToken apiToken; protected String localeCode; + + + public static final String HTTP_METHOD="httpMethod"; + public static final String TIMEOUT="timeOut"; + public static final String SIGNED_URL="signedUrl"; + public static final String NAME="name"; + public static final String URL_TEMPLATE="urlTemplate"; /** * File level @@ -194,9 +208,63 @@ private String getTokenValue(String value) { throw new IllegalArgumentException("Cannot replace reserved word: " + value); } + public JsonObjectBuilder createPostBody(JsonObject params, JsonArray allowedApiCalls) { + JsonObjectBuilder bodyBuilder = Json.createObjectBuilder(); + bodyBuilder.add("queryParameters", params); + if (allowedApiCalls != null && !allowedApiCalls.isEmpty()) { + JsonArrayBuilder apisBuilder = Json.createArrayBuilder(); + allowedApiCalls.getValuesAs(JsonObject.class).forEach(((apiObj) -> { + logger.fine(JsonUtil.prettyPrint(apiObj)); + String name = apiObj.getJsonString(NAME).getString(); + String httpmethod = apiObj.getJsonString(HTTP_METHOD).getString(); + int timeout = apiObj.getInt(TIMEOUT); + String urlTemplate = apiObj.getJsonString(URL_TEMPLATE).getString(); + logger.fine("URL Template: " + urlTemplate); + urlTemplate = SystemConfig.getDataverseSiteUrlStatic() + urlTemplate; + String apiPath = replaceTokensWithValues(urlTemplate); + logger.fine("URL WithTokens: " + apiPath); + String url = apiPath; + // Sign if apiToken exists, otherwise send unsigned URL (i.e. for guest users) + ApiToken apiToken = getApiToken(); + if (apiToken != null) { + url = UrlSignerUtil.signUrl(apiPath, timeout, apiToken.getAuthenticatedUser().getUserIdentifier(), + httpmethod, JvmSettings.API_SIGNING_SECRET.lookupOptional().orElse("") + + getApiToken().getTokenString()); + } + logger.fine("Signed URL: " + url); + apisBuilder.add(Json.createObjectBuilder().add(NAME, name).add(HTTP_METHOD, httpmethod) + .add(SIGNED_URL, url).add(TIMEOUT, timeout)); + })); + bodyBuilder.add("signedUrls", apisBuilder); + } + return bodyBuilder; + } + + public JsonObject getParams(JsonObject toolParameters) { + //ToDo - why an array of object each with a single key/value pair instead of one object? + JsonArray queryParams = toolParameters.getJsonArray("queryParameters"); + + // ToDo return json and print later + JsonObjectBuilder paramsBuilder = Json.createObjectBuilder(); + if (!(queryParams == null) && !queryParams.isEmpty()) { + queryParams.getValuesAs(JsonObject.class).forEach((queryParam) -> { + queryParam.keySet().forEach((key) -> { + String value = queryParam.getString(key); + JsonValue param = getParam(value); + if (param != null) { + paramsBuilder.add(key, param); + } + }); + }); + } + return paramsBuilder.build(); + } + public static String getScriptForUrl(String url) { String msg = BundleUtil.getStringFromBundle("externaltools.enable.browser.popups"); - String script = "const newWin = window.open('" + url + "', target='_blank'); if (!newWin || newWin.closed || typeof newWin.closed == \"undefined\") {alert(\"" + msg + "\");}"; + String newWin = "newWin" + (new Random()).nextInt(1000000000); + //Always use a unique identifier so that more than one script can run (or one can be rerun) without conflicts + String script = String.format("const %1$s = window.open('" + url + "', target='_blank'); if (!%1$s || %1$s.closed || typeof %1$s.closed == \"undefined\") {alert(\"" + msg + "\");}", newWin); return script; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java index baba1a0cb43..b7c44014b80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/BagGenerator.java @@ -74,7 +74,9 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFile.ChecksumType; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; +import java.util.Optional; public class BagGenerator { @@ -822,17 +824,20 @@ private String generateInfoFile() { logger.warning("No contact info available for BagIt Info file"); } - info.append("Source-Organization: " + BundleUtil.getStringFromBundle("bagit.sourceOrganization")); + String orgName = JvmSettings.BAGIT_SOURCE_ORG_NAME.lookupOptional(String.class).orElse("Dataverse Installation ()"); + String orgAddress = JvmSettings.BAGIT_SOURCEORG_ADDRESS.lookupOptional(String.class).orElse(""); + String orgEmail = JvmSettings.BAGIT_SOURCEORG_EMAIL.lookupOptional(String.class).orElse(""); + + info.append("Source-Organization: " + orgName); // ToDo - make configurable info.append(CRLF); - info.append("Organization-Address: " + WordUtils.wrap( - BundleUtil.getStringFromBundle("bagit.sourceOrganizationAddress"), 78, CRLF + " ", true)); + info.append("Organization-Address: " + WordUtils.wrap(orgAddress, 78, CRLF + " ", true)); + info.append(CRLF); // Not a BagIt standard name - info.append( - "Organization-Email: " + BundleUtil.getStringFromBundle("bagit.sourceOrganizationEmail")); + info.append("Organization-Email: " + orgEmail); info.append(CRLF); info.append("External-Description: "); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java index aa653a6e360..84bc7834ab9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/bagit/OREMap.java @@ -1,19 +1,7 @@ package edu.harvard.iq.dataverse.util.bagit; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DvObjectContainer; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.export.OAI_OREExporter; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -236,6 +224,17 @@ public JsonObjectBuilder getOREMapBuilder(boolean aggregationOnly) { } aggRes.add(JsonLDTerm.DVCore("embargoed").getLabel(), embargoObject); } + Retention retention = df.getRetention(); + if(retention!=null) { + String date = retention.getFormattedDateUnavailable(); + String reason= retention.getReason(); + JsonObjectBuilder retentionObject = Json.createObjectBuilder(); + retentionObject.add(JsonLDTerm.DVCore("dateUnavailable").getLabel(), date); + if(reason!=null) { + retentionObject.add(JsonLDTerm.DVCore("reason").getLabel(), reason); + } + aggRes.add(JsonLDTerm.DVCore("retained").getLabel(), retentionObject); + } addIfNotNull(aggRes, JsonLDTerm.directoryLabel, fmd.getDirectoryLabel()); addIfNotNull(aggRes, JsonLDTerm.schemaOrg("version"), fmd.getVersion()); addIfNotNull(aggRes, JsonLDTerm.datasetVersionId, fmd.getDatasetVersion().getId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBean.java b/src/main/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBean.java new file mode 100644 index 00000000000..36b2b35b48f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBean.java @@ -0,0 +1,60 @@ +package edu.harvard.iq.dataverse.util.cache; + +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.annotation.PostConstruct; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.inject.Inject; + +import javax.cache.Cache; +import javax.cache.CacheManager; +import javax.cache.configuration.CompleteConfiguration; +import javax.cache.configuration.MutableConfiguration; +import javax.cache.spi.CachingProvider; +import java.util.logging.Logger; + +@Singleton +@Startup +public class CacheFactoryBean implements java.io.Serializable { + private static final Logger logger = Logger.getLogger(CacheFactoryBean.class.getCanonicalName()); + // Retrieved from Hazelcast, implements ConcurrentMap and is threadsafe + Cache rateLimitCache; + @EJB + SystemConfig systemConfig; + @Inject + CacheManager manager; + @Inject + CachingProvider provider; + public final static String RATE_LIMIT_CACHE = "rateLimitCache"; + + @PostConstruct + public void init() { + rateLimitCache = manager.getCache(RATE_LIMIT_CACHE); + if (rateLimitCache == null) { + CompleteConfiguration config = + new MutableConfiguration() + .setTypes( String.class, String.class ); + rateLimitCache = manager.createCache(RATE_LIMIT_CACHE, config); + } + } + + /** + * Check if user can make this call or if they are rate limited + * @param user + * @param command + * @return true if user is superuser or rate not limited + */ + public boolean checkRate(User user, Command command) { + final String action = command.getClass().getSimpleName(); + int capacity = RateLimitUtil.getCapacity(systemConfig, user, action); + if (capacity == RateLimitUtil.NO_LIMIT) { + return true; + } else { + String cacheKey = RateLimitUtil.generateCacheKey(user, action); + return (!RateLimitUtil.rateLimited(rateLimitCache, cacheKey, capacity)); + } + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitSetting.java b/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitSetting.java new file mode 100644 index 00000000000..54da5a46670 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitSetting.java @@ -0,0 +1,40 @@ +package edu.harvard.iq.dataverse.util.cache; + +import java.util.ArrayList; +import java.util.List; + +public class RateLimitSetting { + + private int tier; + private int limitPerHour = RateLimitUtil.NO_LIMIT; + private List actions = new ArrayList<>(); + + private int defaultLimitPerHour; + + public RateLimitSetting() {} + + public void setTier(int tier) { + this.tier = tier; + } + public int getTier() { + return this.tier; + } + public void setLimitPerHour(int limitPerHour) { + this.limitPerHour = limitPerHour; + } + public int getLimitPerHour() { + return this.limitPerHour; + } + public void setActions(List actions) { + this.actions = actions; + } + public List getActions() { + return this.actions; + } + public void setDefaultLimit(int defaultLimitPerHour) { + this.defaultLimitPerHour = defaultLimitPerHour; + } + public int getDefaultLimitPerHour() { + return this.defaultLimitPerHour; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtil.java new file mode 100644 index 00000000000..b566cd42fe1 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtil.java @@ -0,0 +1,134 @@ +package edu.harvard.iq.dataverse.util.cache; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.json.bind.Jsonb; +import jakarta.json.bind.JsonbBuilder; +import jakarta.json.bind.JsonbException; + +import javax.cache.Cache; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.logging.Logger; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +public class RateLimitUtil { + private static final Logger logger = Logger.getLogger(RateLimitUtil.class.getCanonicalName()); + static final List rateLimits = new CopyOnWriteArrayList<>(); + static final Map rateLimitMap = new ConcurrentHashMap<>(); + public static final int NO_LIMIT = -1; + + static String generateCacheKey(final User user, final String action) { + return (user != null ? user.getIdentifier() : GuestUser.get().getIdentifier()) + + (action != null ? ":" + action : ""); + } + static int getCapacity(SystemConfig systemConfig, User user, String action) { + if (user != null && user.isSuperuser()) { + return NO_LIMIT; + } + // get the capacity, i.e. calls per hour, from config + return (user instanceof AuthenticatedUser authUser) ? + getCapacityByTierAndAction(systemConfig, authUser.getRateLimitTier(), action) : + getCapacityByTierAndAction(systemConfig, 0, action); + } + static boolean rateLimited(final Cache rateLimitCache, final String key, int capacityPerHour) { + if (capacityPerHour == NO_LIMIT) { + return false; + } + long currentTime = System.currentTimeMillis() / 60000L; // convert to minutes + double tokensPerMinute = (capacityPerHour / 60.0); + // Get the last time this bucket was added to + final String keyLastUpdate = String.format("%s:last_update",key); + long lastUpdate = longFromKey(rateLimitCache, keyLastUpdate); + long deltaTime = currentTime - lastUpdate; + // Get the current number of tokens in the bucket + long tokens = longFromKey(rateLimitCache, key); + long tokensToAdd = (long) (deltaTime * tokensPerMinute); + if (tokensToAdd > 0) { // Don't update timestamp if we aren't adding any tokens to the bucket + tokens = min(capacityPerHour, tokens + tokensToAdd); + rateLimitCache.put(keyLastUpdate, String.valueOf(currentTime)); + } + // Update with any added tokens and decrement 1 token for this call if not rate limited (0 tokens) + rateLimitCache.put(key, String.valueOf(max(0, tokens-1))); + return tokens < 1; + } + + static int getCapacityByTierAndAction(SystemConfig systemConfig, Integer tier, String action) { + if (rateLimits.isEmpty()) { + init(systemConfig); + } + + if (rateLimitMap.containsKey(getMapKey(tier, action))) { + return rateLimitMap.get(getMapKey(tier,action)); + } else if (rateLimitMap.containsKey(getMapKey(tier))) { + return rateLimitMap.get(getMapKey(tier)); + } else { + return getCapacityByTier(systemConfig, tier); + } + } + static int getCapacityByTier(SystemConfig systemConfig, int tier) { + int value = NO_LIMIT; + String csvString = systemConfig.getRateLimitingDefaultCapacityTiers(); + try { + if (!csvString.isEmpty()) { + int[] values = Arrays.stream(csvString.split(",")).mapToInt(Integer::parseInt).toArray(); + if (tier < values.length) { + value = values[tier]; + } + } + } catch (NumberFormatException nfe) { + logger.warning(nfe.getMessage()); + } + return value; + } + static void init(SystemConfig systemConfig) { + getRateLimitsFromJson(systemConfig); + /* Convert the List of Rate Limit Settings containing a list of Actions to a fast lookup Map where the key is: + for default if no action defined: "{tier}:" and the value is the default limit for the tier + for each action: "{tier}:{action}" and the value is the limit defined in the setting + */ + rateLimitMap.clear(); + rateLimits.forEach(r -> { + r.setDefaultLimit(getCapacityByTier(systemConfig, r.getTier())); + rateLimitMap.put(getMapKey(r.getTier()), r.getDefaultLimitPerHour()); + r.getActions().forEach(a -> rateLimitMap.put(getMapKey(r.getTier(), a), r.getLimitPerHour())); + }); + } + + @SuppressWarnings("java:S2133") // <- To enable casting to generic in JSON-B we need a class instance, false positive + static void getRateLimitsFromJson(SystemConfig systemConfig) { + String setting = systemConfig.getRateLimitsJson(); + rateLimits.clear(); + if (!setting.isEmpty()) { + try (Jsonb jsonb = JsonbBuilder.create()) { + rateLimits.addAll(jsonb.fromJson(setting, + new ArrayList() {}.getClass().getGenericSuperclass())); + } catch (JsonbException e) { + logger.warning("Unable to parse Rate Limit Json: " + e.getLocalizedMessage() + " Json:(" + setting + ")"); + rateLimits.add(new RateLimitSetting()); // add a default entry to prevent re-initialization + // Note: Usually using Exception in a catch block is an antipattern and should be avoided. + // As the JSON-B interface does not specify a non-generic type, we have to use this. + } catch (Exception e) { + logger.warning("Could not close JSON-B reader"); + } + } + } + static String getMapKey(int tier) { + return getMapKey(tier, null); + } + static String getMapKey(int tier, String action) { + return tier + ":" + (action != null ? action : ""); + } + static long longFromKey(Cache cache, String key) { + Object l = cache.get(key); + return l != null ? Long.parseLong(String.valueOf(l)) : 0L; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinter.java index 3fcaf6b11ff..c16a46a1765 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinter.java @@ -28,6 +28,7 @@ public JsonObjectBuilder json( MetadataBlock blk ) { ? null : jsonObjectBuilder().add("id", blk.getId()) .add("displayName", blk.getDisplayName()) + .add("displayOnCreate", blk.isDisplayOnCreate()) .add("name", blk.getName()) ; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java index 4fb3ffe6c14..52491a5a7e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JSONLDUtil.java @@ -39,7 +39,6 @@ import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; @@ -52,6 +51,7 @@ import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.license.LicenseServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; import jakarta.json.JsonReader; public class JSONLDUtil { @@ -83,7 +83,7 @@ public static Dataset updateDatasetMDFromJsonLD(Dataset ds, String jsonLDBody, JsonObject jsonld = decontextualizeJsonLD(jsonLDBody); if (migrating) { - Optional maybePid = GlobalIdServiceBean.parse(jsonld.getString("@id")); + Optional maybePid = PidProvider.parse(jsonld.getString("@id")); if (maybePid.isPresent()) { ds.setGlobalId(maybePid.get()); } else { @@ -466,7 +466,6 @@ private static void addField(DatasetField dsf, JsonArray valArray, DatasetFieldT if(!datasetFieldSvc.isValidCVocValue(dsft, strValue)) { throw new BadRequestException("Invalid values submitted for " + dsft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dsft.getId()), strValue); } DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 984c607aac7..addccc93fe0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -38,7 +38,6 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -69,7 +68,8 @@ public class JsonParser { MetadataBlockServiceBean blockService; SettingsServiceBean settingsService; LicenseServiceBean licenseService; - HarvestingClient harvestingClient = null; + HarvestingClient harvestingClient = null; + boolean allowHarvestingMissingCVV = false; /** * if lenient, we will accept alternate spellings for controlled vocabulary values @@ -93,6 +93,7 @@ public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceB this.settingsService = settingsService; this.licenseService = licenseService; this.harvestingClient = harvestingClient; + this.allowHarvestingMissingCVV = harvestingClient != null && harvestingClient.getAllowHarvestingMissingCVV(); } public JsonParser() { @@ -318,8 +319,8 @@ public DatasetVersion parseDatasetVersion(JsonObject obj) throws JsonParseExcept public Dataset parseDataset(JsonObject obj) throws JsonParseException { Dataset dataset = new Dataset(); - dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); - dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); + dataset.setAuthority(obj.getString("authority", null)); + dataset.setProtocol(obj.getString("protocol", null)); dataset.setIdentifier(obj.getString("identifier",null)); String mdl = obj.getString("metadataLanguage",null); if(mdl==null || settingsService.getBaseMetadataLanguageMap(new HashMap(), true).containsKey(mdl)) { @@ -737,39 +738,23 @@ public DatasetField parseField(JsonObject json, Boolean testType) throws JsonPar ret.setDatasetFieldType(type); - - if (type.isCompound()) { - List vals = parseCompoundValue(type, json, testType); - for (DatasetFieldCompoundValue dsfcv : vals) { - dsfcv.setParentDatasetField(ret); - } - ret.setDatasetFieldCompoundValues(vals); + if (type.isCompound()) { + parseCompoundValue(ret, type, json, testType); } else if (type.isControlledVocabulary()) { - List vals = parseControlledVocabularyValue(type, json); - for (ControlledVocabularyValue cvv : vals) { - cvv.setDatasetFieldType(type); - } - ret.setControlledVocabularyValues(vals); - + parseControlledVocabularyValue(ret, type, json); } else { - // primitive - - List values = parsePrimitiveValue(type, json); - for (DatasetFieldValue val : values) { - val.setDatasetField(ret); - } - ret.setDatasetFieldValues(values); - } + parsePrimitiveValue(ret, type, json); + } return ret; } - public List parseCompoundValue(DatasetFieldType compoundType, JsonObject json) throws JsonParseException { - return parseCompoundValue(compoundType, json, true); + public void parseCompoundValue(DatasetField dsf, DatasetFieldType compoundType, JsonObject json) throws JsonParseException { + parseCompoundValue(dsf, compoundType, json, true); } - public List parseCompoundValue(DatasetFieldType compoundType, JsonObject json, Boolean testType) throws JsonParseException { + public void parseCompoundValue(DatasetField dsf, DatasetFieldType compoundType, JsonObject json, Boolean testType) throws JsonParseException { List vocabExceptions = new ArrayList<>(); List vals = new LinkedList<>(); if (compoundType.isAllowMultiples()) { @@ -836,18 +821,17 @@ public List parseCompoundValue(DatasetFieldType compo if (!vocabExceptions.isEmpty()) { throw new CompoundVocabularyException( "Invalid controlled vocabulary in compound field ", vocabExceptions, vals); } - return vals; + + for (DatasetFieldCompoundValue dsfcv : vals) { + dsfcv.setParentDatasetField(dsf); + } + dsf.setDatasetFieldCompoundValues(vals); } - public List parsePrimitiveValue(DatasetFieldType dft , JsonObject json) throws JsonParseException { + public void parsePrimitiveValue(DatasetField dsf, DatasetFieldType dft , JsonObject json) throws JsonParseException { Map cvocMap = datasetFieldSvc.getCVocConf(true); - boolean extVocab=false; - if(cvocMap.containsKey(dft.getId())) { - extVocab=true; - } - - + boolean extVocab = cvocMap.containsKey(dft.getId()); List vals = new LinkedList<>(); if (dft.isAllowMultiples()) { try { @@ -856,14 +840,13 @@ public List parsePrimitiveValue(DatasetFieldType dft , JsonOb throw new JsonParseException("Invalid values submitted for " + dft.getName() + ". It should be an array of values."); } for (JsonString val : json.getJsonArray("value").getValuesAs(JsonString.class)) { - DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(dsf); datasetFieldValue.setDisplayOrder(vals.size() - 1); datasetFieldValue.setValue(val.getString().trim()); if(extVocab) { if(!datasetFieldSvc.isValidCVocValue(dft, datasetFieldValue.getValue())) { throw new JsonParseException("Invalid values submitted for " + dft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dft.getId()), datasetFieldValue.getValue()); } vals.add(datasetFieldValue); } @@ -875,16 +858,16 @@ public List parsePrimitiveValue(DatasetFieldType dft , JsonOb } DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); datasetFieldValue.setValue(json.getString("value", "").trim()); + datasetFieldValue.setDatasetField(dsf); if(extVocab) { if(!datasetFieldSvc.isValidCVocValue(dft, datasetFieldValue.getValue())) { throw new JsonParseException("Invalid values submitted for " + dft.getName() + " which is limited to specific vocabularies."); } - datasetFieldSvc.registerExternalTerm(cvocMap.get(dft.getId()), datasetFieldValue.getValue()); } vals.add(datasetFieldValue); } - return vals; + dsf.setDatasetFieldValues(vals); } public Workflow parseWorkflow(JsonObject json) throws JsonParseException { @@ -929,31 +912,35 @@ private String jsonValueToString(JsonValue jv) { default: return jv.toString(); } } - - public List parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException { + + public void parseControlledVocabularyValue(DatasetField dsf, DatasetFieldType cvvType, JsonObject json) throws JsonParseException { + List vals = new LinkedList<>(); try { if (cvvType.isAllowMultiples()) { try { json.getJsonArray("value").getValuesAs(JsonObject.class); } catch (ClassCastException cce) { throw new JsonParseException("Invalid values submitted for " + cvvType.getName() + ". It should be an array of values."); - } - List vals = new LinkedList<>(); + } for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) { String strValue = strVal.getString(); ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient); if (cvv == null) { - throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); - } - // Only add value to the list if it is not a duplicate - if (strValue.equals("Other")) { - System.out.println("vals = " + vals + ", contains: " + vals.contains(cvv)); + if (allowHarvestingMissingCVV) { + // we need to process these as primitive values + logger.warning("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'. Processing as primitive per setting override."); + parsePrimitiveValue(dsf, cvvType, json); + return; + } else { + throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); + } } + cvv.setDatasetFieldType(cvvType); + // Only add value to the list if it is not a duplicate if (!vals.contains(cvv)) { vals.add(cvv); } } - return vals; } else { try { @@ -964,13 +951,23 @@ public List parseControlledVocabularyValue(DatasetFie String strValue = json.getString("value", ""); ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient); if (cvv == null) { - throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); + if (allowHarvestingMissingCVV) { + // we need to process this as a primitive value + logger.warning(">>>> Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'. Processing as primitive per setting override."); + parsePrimitiveValue(dsf, cvvType , json); + return; + } else { + throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); + } } - return Collections.singletonList(cvv); + cvv.setDatasetFieldType(cvvType); + vals.add(cvv); } } catch (ClassCastException cce) { throw new JsonParseException("Invalid values submitted for " + cvvType.getName()); } + + dsf.setControlledVocabularyValues(vals); } Date parseDate(String str) throws ParseException { @@ -1001,6 +998,7 @@ public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingC harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); harvestingClient.setHarvestingSet(obj.getString("set",null)); harvestingClient.setCustomHttpHeaders(obj.getString("customHeaders", null)); + harvestingClient.setAllowHarvestingMissingCVV(obj.getBoolean("allowHarvestingMissingCVV", false)); return dataverseAlias; } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 65fe749e554..c72dfc1d127 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -55,6 +55,7 @@ import jakarta.ejb.Singleton; import jakarta.json.JsonArray; import jakarta.json.JsonObject; +import java.math.BigDecimal; /** * Convert objects to Json. @@ -257,11 +258,11 @@ public static JsonObjectBuilder json(Workflow wf){ } public static JsonObjectBuilder json(Dataverse dv) { - return json(dv, false); + return json(dv, false, false); } //TODO: Once we upgrade to Java EE 8 we can remove objects from the builder, and this email removal can be done in a better place. - public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { + public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean returnOwners) { JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dv.getId()) .add("alias", dv.getAlias()) @@ -270,7 +271,9 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { if(!hideEmail) { bld.add("dataverseContacts", JsonPrinter.json(dv.getDataverseContacts())); } - + if (returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(dv)); + } bld.add("permissionRoot", dv.isPermissionRoot()) .add("description", dv.getDescription()) .add("dataverseType", dv.getDataverseType().name()); @@ -289,6 +292,12 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail) { if (dv.getFilePIDsEnabled() != null) { bld.add("filePIDsEnabled", dv.getFilePIDsEnabled()); } + bld.add("isReleased", dv.isReleased()); + + List inputLevels = dv.getDataverseFieldTypeInputLevels(); + if(!inputLevels.isEmpty()) { + bld.add("inputLevels", JsonPrinter.jsonDataverseFieldTypeInputLevels(inputLevels)); + } return bld; } @@ -303,7 +312,57 @@ public static JsonArrayBuilder json(List dataverseContacts) { } return jsonArrayOfContacts; } + + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject){ + return getOwnersFromDvObject(dvObject, null); + } + + public static JsonObjectBuilder getOwnersFromDvObject(DvObject dvObject, DatasetVersion dsv) { + List ownerList = new ArrayList(); + dvObject = dvObject.getOwner(); // We're going to ignore the object itself + //Get "root" to top of list + while (dvObject != null) { + ownerList.add(0, dvObject); + dvObject = dvObject.getOwner(); + } + //then work "inside out" + JsonObjectBuilder saved = null; + for (DvObject dvo : ownerList) { + saved = addEmbeddedOwnerObject(dvo, saved, dsv); + } + return saved; + } + + private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObjectBuilder isPartOf, DatasetVersion dsv ) { + JsonObjectBuilder ownerObject = jsonObjectBuilder(); + if (dvo.isInstanceofDataverse()) { + ownerObject.add("type", "DATAVERSE"); + Dataverse in = (Dataverse) dvo; + ownerObject.add("identifier", in.getAlias()); + } + + if (dvo.isInstanceofDataset()) { + ownerObject.add("type", "DATASET"); + if (dvo.getGlobalId() != null) { + ownerObject.add("persistentIdentifier", dvo.getGlobalId().asString()); + } + ownerObject.add("identifier", dvo.getId()); + String versionString = dsv == null ? "" : dsv.getFriendlyVersionNumber(); + if (!versionString.isEmpty()){ + ownerObject.add("version", versionString); + } + } + + ownerObject.add("displayName", dvo.getDisplayName()); + + if (isPartOf != null) { + ownerObject.add("isPartOf", isPartOf); + } + + return ownerObject; + } + public static JsonObjectBuilder json( DataverseTheme theme ) { final NullSafeJsonBuilder baseObject = jsonObjectBuilder() .add("id", theme.getId() ) @@ -326,8 +385,12 @@ public static JsonObjectBuilder json(BuiltinUser user) { .add("id", user.getId()) .add("userName", user.getUserName()); } + + public static JsonObjectBuilder json(Dataset ds){ + return json(ds, false); + } - public static JsonObjectBuilder json(Dataset ds) { + public static JsonObjectBuilder json(Dataset ds, Boolean returnOwners) { JsonObjectBuilder bld = jsonObjectBuilder() .add("id", ds.getId()) .add("identifier", ds.getIdentifier()) @@ -340,6 +403,9 @@ public static JsonObjectBuilder json(Dataset ds) { if (DvObjectContainer.isMetadataLanguageSet(ds.getMetadataLanguage())) { bld.add("metadataLanguage", ds.getMetadataLanguage()); } + if (returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(ds)); + } return bld; } @@ -352,30 +418,33 @@ public static JsonObjectBuilder json(FileDetailsHolder ds) { } public static JsonObjectBuilder json(DatasetVersion dsv, boolean includeFiles) { - return json(dsv, null, includeFiles); + return json(dsv, null, includeFiles, false); } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles) { - /* return json(dsv, null, includeFiles, null); - } - public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, boolean includeFiles, Long numberOfFiles) {*/ + public static JsonObjectBuilder json(DatasetVersion dsv, List anonymizedFieldTypeNamesList, + boolean includeFiles, boolean returnOwners) { Dataset dataset = dsv.getDataset(); JsonObjectBuilder bld = jsonObjectBuilder() .add("id", dsv.getId()).add("datasetId", dataset.getId()) .add("datasetPersistentId", dataset.getGlobalId().asString()) .add("storageIdentifier", dataset.getStorageIdentifier()) - .add("versionNumber", dsv.getVersionNumber()).add("versionMinorNumber", dsv.getMinorVersionNumber()) - .add("versionState", dsv.getVersionState().name()).add("versionNote", dsv.getVersionNote()) - .add("archiveNote", dsv.getArchiveNote()).add("deaccessionLink", dsv.getDeaccessionLink()) - .add("distributionDate", dsv.getDistributionDate()).add("productionDate", dsv.getProductionDate()) + .add("versionNumber", dsv.getVersionNumber()) + .add("versionMinorNumber", dsv.getMinorVersionNumber()) + .add("versionState", dsv.getVersionState().name()) + .add("latestVersionPublishingState", dataset.getLatestVersion().getVersionState().name()) + .add("versionNote", dsv.getVersionNote()) + .add("archiveNote", dsv.getArchiveNote()) + .add("deaccessionLink", dsv.getDeaccessionLink()) + .add("distributionDate", dsv.getDistributionDate()) + .add("productionDate", dsv.getProductionDate()) .add("UNF", dsv.getUNF()).add("archiveTime", format(dsv.getArchiveTime())) - .add("lastUpdateTime", format(dsv.getLastUpdateTime())).add("releaseTime", format(dsv.getReleaseTime())) + .add("lastUpdateTime", format(dsv.getLastUpdateTime())) + .add("releaseTime", format(dsv.getReleaseTime())) .add("createTime", format(dsv.getCreateTime())) .add("alternativePersistentId", dataset.getAlternativePersistentIdentifier()) .add("publicationDate", dataset.getPublicationDateFormattedYYYYMMDD()) .add("citationDate", dataset.getCitationDateFormattedYYYYMMDD()); - //.add("numberOfFiles", numberOfFiles); - + License license = DatasetUtil.getLicense(dsv); if (license != null) { bld.add("license", jsonLicense(dsv)); @@ -402,7 +471,10 @@ public static JsonObjectBuilder json(DatasetVersion dsv, List anonymized bld.add("metadataBlocks", (anonymizedFieldTypeNamesList != null) ? jsonByBlocks(dsv.getDatasetFields(), anonymizedFieldTypeNamesList) : jsonByBlocks(dsv.getDatasetFields()) - ); + ); + if(returnOwners){ + bld.add("isPartOf", getOwnersFromDvObject(dataset)); + } if (includeFiles) { bld.add("files", jsonFileMetadatas(dsv.getFileMetadatas())); } @@ -521,6 +593,18 @@ public static JsonObjectBuilder json(MetadataBlock block, List fie return blockBld; } + public static JsonArrayBuilder json(List metadataBlocks, boolean returnDatasetFieldTypes, boolean printOnlyDisplayedOnCreateDatasetFieldTypes) { + return json(metadataBlocks, returnDatasetFieldTypes, printOnlyDisplayedOnCreateDatasetFieldTypes, null); + } + + public static JsonArrayBuilder json(List metadataBlocks, boolean returnDatasetFieldTypes, boolean printOnlyDisplayedOnCreateDatasetFieldTypes, Dataverse ownerDataverse) { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + for (MetadataBlock metadataBlock : metadataBlocks) { + arrayBuilder.add(returnDatasetFieldTypes ? json(metadataBlock, printOnlyDisplayedOnCreateDatasetFieldTypes, ownerDataverse) : brief.json(metadataBlock)); + } + return arrayBuilder; + } + public static String typeClassString(DatasetFieldType typ) { if (typ.isControlledVocabulary()) { return "controlledVocabulary"; @@ -543,33 +627,64 @@ public static JsonObject json(DatasetField dfv) { } } - public static JsonObjectBuilder json(MetadataBlock blk) { - JsonObjectBuilder bld = jsonObjectBuilder(); - bld.add("id", blk.getId()); - bld.add("name", blk.getName()); - bld.add("displayName", blk.getDisplayName()); + public static JsonObjectBuilder json(MetadataBlock metadataBlock) { + return json(metadataBlock, false, null); + } - JsonObjectBuilder fieldsBld = jsonObjectBuilder(); - for (DatasetFieldType df : new TreeSet<>(blk.getDatasetFieldTypes())) { - fieldsBld.add(df.getName(), JsonPrinter.json(df)); - } + public static JsonObjectBuilder json(MetadataBlock metadataBlock, boolean printOnlyDisplayedOnCreateDatasetFieldTypes, Dataverse ownerDataverse) { + JsonObjectBuilder jsonObjectBuilder = jsonObjectBuilder(); + jsonObjectBuilder.add("id", metadataBlock.getId()); + jsonObjectBuilder.add("name", metadataBlock.getName()); + jsonObjectBuilder.add("displayName", metadataBlock.getDisplayName()); + jsonObjectBuilder.add("displayOnCreate", metadataBlock.isDisplayOnCreate()); - bld.add("fields", fieldsBld); + JsonObjectBuilder fieldsBuilder = Json.createObjectBuilder(); + Set datasetFieldTypes = new TreeSet<>(metadataBlock.getDatasetFieldTypes()); - return bld; + for (DatasetFieldType datasetFieldType : datasetFieldTypes) { + Long datasetFieldTypeId = datasetFieldType.getId(); + boolean requiredAsInputLevelInOwnerDataverse = ownerDataverse != null && ownerDataverse.isDatasetFieldTypeRequiredAsInputLevel(datasetFieldTypeId); + boolean includedAsInputLevelInOwnerDataverse = ownerDataverse != null && ownerDataverse.isDatasetFieldTypeIncludedAsInputLevel(datasetFieldTypeId); + boolean isNotInputLevelInOwnerDataverse = ownerDataverse != null && !ownerDataverse.isDatasetFieldTypeInInputLevels(datasetFieldTypeId); + + DatasetFieldType parentDatasetFieldType = datasetFieldType.getParentDatasetFieldType(); + boolean isRequired = parentDatasetFieldType == null ? datasetFieldType.isRequired() : parentDatasetFieldType.isRequired(); + + boolean displayCondition = printOnlyDisplayedOnCreateDatasetFieldTypes + ? (datasetFieldType.isDisplayOnCreate() || isRequired || requiredAsInputLevelInOwnerDataverse) + : ownerDataverse == null || includedAsInputLevelInOwnerDataverse || isNotInputLevelInOwnerDataverse; + + if (displayCondition) { + fieldsBuilder.add(datasetFieldType.getName(), json(datasetFieldType, ownerDataverse)); + } + } + + jsonObjectBuilder.add("fields", fieldsBuilder); + return jsonObjectBuilder; } public static JsonObjectBuilder json(DatasetFieldType fld) { + return json(fld, null); + } + + public static JsonObjectBuilder json(DatasetFieldType fld, Dataverse ownerDataverse) { JsonObjectBuilder fieldsBld = jsonObjectBuilder(); fieldsBld.add("name", fld.getName()); fieldsBld.add("displayName", fld.getDisplayName()); + fieldsBld.add("displayOnCreate", fld.isDisplayOnCreate()); fieldsBld.add("title", fld.getTitle()); fieldsBld.add("type", fld.getFieldType().toString()); + fieldsBld.add("typeClass", typeClassString(fld)); fieldsBld.add("watermark", fld.getWatermark()); fieldsBld.add("description", fld.getDescription()); fieldsBld.add("multiple", fld.isAllowMultiples()); fieldsBld.add("isControlledVocabulary", fld.isControlledVocabulary()); fieldsBld.add("displayFormat", fld.getDisplayFormat()); + fieldsBld.add("displayOrder", fld.getDisplayOrder()); + + boolean requiredInOwnerDataverse = ownerDataverse != null && ownerDataverse.isDatasetFieldTypeRequiredAsInputLevel(fld.getId()); + fieldsBld.add("isRequired", requiredInOwnerDataverse || fld.isRequired()); + if (fld.isControlledVocabulary()) { // If the field has a controlled vocabulary, // add all values to the resulting JSON @@ -579,40 +694,52 @@ public static JsonObjectBuilder json(DatasetFieldType fld) { } fieldsBld.add("controlledVocabularyValues", jab); } + if (!fld.getChildDatasetFieldTypes().isEmpty()) { JsonObjectBuilder subFieldsBld = jsonObjectBuilder(); for (DatasetFieldType subFld : fld.getChildDatasetFieldTypes()) { - subFieldsBld.add(subFld.getName(), JsonPrinter.json(subFld)); + subFieldsBld.add(subFld.getName(), JsonPrinter.json(subFld, ownerDataverse)); } fieldsBld.add("childFields", subFieldsBld); } return fieldsBld; } + + public static JsonObjectBuilder json(FileMetadata fmd){ + return json(fmd, false, false); + } + + public static JsonObjectBuilder json(FileMetadata fmd, boolean returnOwners, boolean printDatasetVersion) { + NullSafeJsonBuilder builder = jsonObjectBuilder(); - public static JsonObjectBuilder json(FileMetadata fmd) { - return jsonObjectBuilder() // deprecated: .add("category", fmd.getCategory()) - // TODO: uh, figure out what to do here... it's deprecated - // in a sense that there's no longer the category field in the - // fileMetadata object; but there are now multiple, oneToMany file + // TODO: uh, figure out what to do here... it's deprecated + // in a sense that there's no longer the category field in the + // fileMetadata object; but there are now multiple, oneToMany file // categories - and we probably need to export them too!) -- L.A. 4.5 - // DONE: catgegories by name - .add("description", fmd.getDescription()) + // DONE: catgegories by name + builder.add("description", fmd.getDescription()) .add("label", fmd.getLabel()) // "label" is the filename - .add("restricted", fmd.isRestricted()) + .add("restricted", fmd.isRestricted()) .add("directoryLabel", fmd.getDirectoryLabel()) .add("version", fmd.getVersion()) .add("datasetVersionId", fmd.getDatasetVersion().getId()) .add("categories", getFileCategories(fmd)) - .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false)); + .add("dataFile", JsonPrinter.json(fmd.getDataFile(), fmd, false, returnOwners)); + + if (printDatasetVersion) { + builder.add("datasetVersion", json(fmd.getDatasetVersion(), false)); + } + + return builder; } - public static JsonObjectBuilder json(AuxiliaryFile auxFile) { + public static JsonObjectBuilder json(AuxiliaryFile auxFile) { return jsonObjectBuilder() .add("formatTag", auxFile.getFormatTag()) .add("formatVersion", auxFile.getFormatVersion()) // "label" is the filename - .add("origin", auxFile.getOrigin()) + .add("origin", auxFile.getOrigin()) .add("isPublic", auxFile.getIsPublic()) .add("type", auxFile.getType()) .add("contentType", auxFile.getContentType()) @@ -620,11 +747,16 @@ public static JsonObjectBuilder json(AuxiliaryFile auxFile) { .add("checksum", auxFile.getChecksum()) .add("dataFile", JsonPrinter.json(auxFile.getDataFile())); } + public static JsonObjectBuilder json(DataFile df) { return JsonPrinter.json(df, null, false); } - public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider) { + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider){ + return json(df, fileMetadata, forExportDataProvider, false); + } + + public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boolean forExportDataProvider, boolean returnOwners) { // File names are no longer stored in the DataFile entity; // (they are instead in the FileMetadata (as "labels") - this way // the filename can change between versions... @@ -648,6 +780,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo String pidString = (filePid!=null)? filePid.asString(): ""; JsonObjectBuilder embargo = df.getEmbargo() != null ? JsonPrinter.json(df.getEmbargo()) : null; + JsonObjectBuilder retention = df.getRetention() != null ? JsonPrinter.json(df.getRetention()) : null; NullSafeJsonBuilder builder = jsonObjectBuilder() .add("id", df.getId()) @@ -660,6 +793,7 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo .add("description", fileMetadata.getDescription()) .add("categories", getFileCategories(fileMetadata)) .add("embargo", embargo) + .add("retention", retention) //.add("released", df.isReleased()) .add("storageIdentifier", df.getStorageIdentifier()) .add("originalFileFormat", df.getOriginalFileFormat()) @@ -700,6 +834,9 @@ public static JsonObjectBuilder json(DataFile df, FileMetadata fileMetadata, boo ? JsonPrinter.jsonVarGroup(fileMetadata.getVarGroups()) : null); } + if (returnOwners){ + builder.add("isPartOf", getOwnersFromDvObject(df, fileMetadata.getDatasetVersion())); + } return builder; } @@ -796,9 +933,10 @@ private static JsonArrayBuilder jsonCatStat(Collection catStat JsonObjectBuilder catStatObj = Json.createObjectBuilder(); catStatObj.add("label", stat.getLabel()) .add("value", stat.getValue()) - .add("isMissing", stat.isMissing()) - .add("frequency", stat.getFrequency()) - ; + .add("isMissing", stat.isMissing()); + if(stat.getFrequency()!=null){ + catStatObj.add("frequency", stat.getFrequency()); + } catArr.add(catStatObj); } return catArr; @@ -867,6 +1005,7 @@ public static JsonObjectBuilder json(HarvestingClient harvestingClient) { add("schedule", harvestingClient.isScheduled() ? harvestingClient.getScheduleDescription() : "none"). add("status", harvestingClient.isHarvestingNow() ? "inProgress" : "inActive"). add("customHeaders", harvestingClient.getCustomHttpHeaders()). + add("allowHarvestingMissingCVV", harvestingClient.getAllowHarvestingMissingCVV()). add("lastHarvest", harvestingClient.getLastHarvestTime() == null ? null : harvestingClient.getLastHarvestTime().toString()). add("lastResult", harvestingClient.getLastResult()). add("lastSuccessful", harvestingClient.getLastSuccessfulHarvestTime() == null ? null : harvestingClient.getLastSuccessfulHarvestTime().toString()). @@ -1060,6 +1199,11 @@ public static JsonObjectBuilder json(Embargo embargo) { embargo.getReason()); } + public static JsonObjectBuilder json(Retention retention) { + return jsonObjectBuilder().add("dateUnavailable", retention.getDateUnavailable().toString()).add("reason", + retention.getReason()); + } + public static JsonObjectBuilder json(License license) { return jsonObjectBuilder() .add("id", license.getId()) @@ -1228,4 +1372,16 @@ private static JsonObjectBuilder jsonLicense(DatasetVersion dsv) { } return licenseJsonObjectBuilder; } + + public static JsonArrayBuilder jsonDataverseFieldTypeInputLevels(List inputLevels) { + JsonArrayBuilder jsonArrayOfInputLevels = Json.createArrayBuilder(); + for (DataverseFieldTypeInputLevel inputLevel : inputLevels) { + NullSafeJsonBuilder inputLevelJsonObject = NullSafeJsonBuilder.jsonObjectBuilder(); + inputLevelJsonObject.add("datasetFieldTypeName", inputLevel.getDatasetFieldType().getName()); + inputLevelJsonObject.add("required", inputLevel.isRequired()); + inputLevelJsonObject.add("include", inputLevel.isInclude()); + jsonArrayOfInputLevels.add(inputLevelJsonObject); + } + return jsonArrayOfInputLevels; + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java index cf8b64520de..72a1cd2e1eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonUtil.java @@ -1,9 +1,9 @@ package edu.harvard.iq.dataverse.util.json; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.JsonObject; - +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; import java.io.StringReader; import java.io.StringWriter; import java.util.HashMap; @@ -11,6 +11,8 @@ import java.util.logging.Logger; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonException; +import jakarta.json.JsonObject; import jakarta.json.JsonReader; import jakarta.json.JsonWriter; import jakarta.json.JsonWriterFactory; @@ -20,17 +22,19 @@ public class JsonUtil { private static final Logger logger = Logger.getLogger(JsonUtil.class.getCanonicalName()); + private JsonUtil() {} + /** * Make an attempt at pretty printing a String but will return the original * string if it isn't JSON or if there is any exception. */ public static String prettyPrint(String jsonString) { try { - com.google.gson.JsonParser jsonParser = new com.google.gson.JsonParser(); - JsonObject jsonObject = jsonParser.parse(jsonString).getAsJsonObject(); - Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String prettyJson = gson.toJson(jsonObject); - return prettyJson; + if (jsonString.trim().startsWith("{")) { + return prettyPrint(getJsonObject(jsonString)); + } else { + return prettyPrint(getJsonArray(jsonString)); + } } catch (Exception ex) { logger.info("Returning original string due to exception: " + ex); return jsonString; @@ -48,7 +52,7 @@ public static String prettyPrint(JsonArray jsonArray) { return stringWriter.toString(); } - public static String prettyPrint(jakarta.json.JsonObject jsonObject) { + public static String prettyPrint(JsonObject jsonObject) { Map config = new HashMap<>(); config.put(JsonGenerator.PRETTY_PRINTING, true); JsonWriterFactory jsonWriterFactory = Json.createWriterFactory(config); @@ -58,16 +62,69 @@ public static String prettyPrint(jakarta.json.JsonObject jsonObject) { } return stringWriter.toString(); } - - public static jakarta.json.JsonObject getJsonObject(String serializedJson) { + + /** + * Return the contents of the string as a JSON object. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param serializedJson the JSON object serialized as a {@code String} + * @throws JsonException when parsing fails. + * @see #getJsonObject(InputStream) + * @see #getJsonObjectFromFile(String) + * @see #getJsonArray(String) + */ + public static JsonObject getJsonObject(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { try (JsonReader jsonReader = Json.createReader(rdr)) { return jsonReader.readObject(); } } } - - public static jakarta.json.JsonArray getJsonArray(String serializedJson) { + + /** + * Return the contents of the {@link InputStream} as a JSON object. + * + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * The caller of this method is responsible for closing the provided stream. + * @param stream the input stream to read from + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + * @see #getJsonObjectFromFile(String) + */ + public static JsonObject getJsonObject(InputStream stream) { + try (JsonReader jsonReader = Json.createReader(stream)) { + return jsonReader.readObject(); + } + } + + /** + * Return the contents of the file as a JSON object. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param fileName the name of the file to read from + * @throws FileNotFoundException when the file cannot be opened for reading + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + * @see #getJsonObject(InputStream) + */ + public static JsonObject getJsonObjectFromFile(String fileName) throws IOException { + try (FileReader rdr = new FileReader(fileName)) { + try (JsonReader jsonReader = Json.createReader(rdr)) { + return jsonReader.readObject(); + } + } + } + + /** + * Return the contents of the string as a JSON array. + * This method closes its resources when an exception occurs, but does + * not catch any exceptions. + * @param serializedJson the JSON array serialized as a {@code String} + * @throws JsonException when parsing fails. + * @see #getJsonObject(String) + */ + public static JsonArray getJsonArray(String serializedJson) { try (StringReader rdr = new StringReader(serializedJson)) { try (JsonReader jsonReader = Json.createReader(rdr)) { return jsonReader.readArray(); diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/EMailValidator.java b/src/main/java/edu/harvard/iq/dataverse/validation/EMailValidator.java index 624e49623f2..446f55a193f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/EMailValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/EMailValidator.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.validation; +import edu.harvard.iq.dataverse.settings.JvmSettings; import jakarta.validation.ConstraintValidator; import jakarta.validation.ConstraintValidatorContext; @@ -10,7 +11,7 @@ * @author skraffmi */ public class EMailValidator implements ConstraintValidator { - + @Override public boolean isValid(String value, ConstraintValidatorContext context) { return isEmailValid(value); @@ -23,6 +24,12 @@ public boolean isValid(String value, ConstraintValidatorContext context) { * @return true when valid, false when invalid (null = valid!) */ public static boolean isEmailValid(String value) { - return value == null || EmailValidator.getInstance().isValid(value); + // Must be looked up here - otherwise changes are not picked up (tests, live config, ...) + final boolean mtaSupportsUTF8 = JvmSettings.MAIL_MTA_SUPPORT_UTF8.lookup(Boolean.class); + return value == null || (EmailValidator.getInstance().isValid(value) && + // If the MTA isn't able to handle UTF-8 mail addresses following RFC 6530/6531/6532, we can only declare + // mail addresses using 7bit ASCII (RFC 821) as valid. + // Beyond scope for Apache Commons Validator, see also https://issues.apache.org/jira/browse/VALIDATOR-487 + (value.codePoints().noneMatch(c -> c > 127) || mtaSupportsUTF8) ); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java b/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java index 285f34d3f8c..8fde76d84e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/URLValidator.java @@ -41,7 +41,7 @@ public static boolean isURLValid(String value) { * @return true when valid (null is also valid) or false */ public static boolean isURLValid(String value, String[] schemes) { - UrlValidator urlValidator = new UrlValidator(schemes); + UrlValidator urlValidator = new UrlValidator(schemes, UrlValidator.ALLOW_2_SLASHES); return value == null || urlValidator.isValid(value); } diff --git a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java index 47f24c9b8bd..757d447b60a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/workflow/WorkflowServiceBean.java @@ -1,8 +1,10 @@ package edu.harvard.iq.dataverse.workflow; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLock; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DataverseRequestServiceBean; +import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.RoleAssigneeServiceBean; import edu.harvard.iq.dataverse.UserNotification; @@ -58,6 +60,9 @@ public class WorkflowServiceBean { @EJB DatasetServiceBean datasets; + + @EJB + DvObjectServiceBean dvObjects; @EJB SettingsServiceBean settings; @@ -387,16 +392,11 @@ private void workflowCompleted(Workflow wf, WorkflowContext ctxt) { //Now lock for FinalizePublication - this block mirrors that in PublishDatasetCommand AuthenticatedUser user = ctxt.getRequest().getAuthenticatedUser(); DatasetLock lock = new DatasetLock(DatasetLock.Reason.finalizePublication, user); - lock.setDataset(ctxt.getDataset()); - String currentGlobalIdProtocol = settings.getValueForKey(SettingsServiceBean.Key.Protocol, ""); - String currentGlobalAuthority= settings.getValueForKey(SettingsServiceBean.Key.Authority, ""); - String dataFilePIDFormat = settings.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, "DEPENDENT"); + Dataset dataset = ctxt.getDataset(); + lock.setDataset(dataset); boolean registerGlobalIdsForFiles = - (currentGlobalIdProtocol.equals(ctxt.getDataset().getProtocol()) || dataFilePIDFormat.equals("INDEPENDENT")) - && systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()); - if ( registerGlobalIdsForFiles ){ - registerGlobalIdsForFiles = currentGlobalAuthority.equals( ctxt.getDataset().getAuthority() ); - } + systemConfig.isFilePIDsEnabledForCollection(ctxt.getDataset().getOwner()) && + dvObjects.getEffectivePidGenerator(dataset).canCreatePidsLike(dataset.getGlobalId()); boolean validatePhysicalFiles = systemConfig.isDatafileValidationOnPublishEnabled(); String info = "Publishing the dataset; "; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 79887f7e76c..0325a47f626 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -15,6 +15,7 @@ embargoed=Embargoed embargoedaccess=Embargoed with Access embargoedandrestricted=Embargoed and then Restricted embargoedandrestrictedaccess=Embargoed and then Restricted with Access +retentionExpired=Retention Period Expired incomplete=Incomplete metadata valid=Valid find=Find @@ -30,6 +31,12 @@ embargoed.wasthrough=Was embargoed until embargoed.willbeuntil=Draft: will be embargoed until embargo.date.invalid=Date is outside the allowed range: ({0} to {1}) embargo.date.required=An embargo date is required +retention.after=Was retained until +retention.isfrom=Is retained until +retention.willbeafter=Draft: will be retained until +retention.enddateinfo=after which it will no longer be accessible +retention.date.invalid=Date is outside the allowed range: ({0} to {1}) +retention.date.required=A retention period end date is required cancel=Cancel ok=OK saveChanges=Save Changes @@ -56,6 +63,7 @@ storage=Storage curationLabels=Curation Labels metadataLanguage=Dataset Metadata Language guestbookEntryOption=Guestbook Entry Option +pidProviderOption=PID Provider Option createDataverse=Create Dataverse remove=Remove done=Done @@ -64,6 +72,7 @@ manager=Manager curator=Curator explore=Explore download=Download +transfer=Globus Transfer downloadOriginal=Original Format downloadArchival=Archival Format (.tab) deaccession=Deaccession @@ -154,6 +163,7 @@ contact.support=Support contact.from=From contact.from.required=User email is required. contact.from.invalid=Email is invalid. +contact.from.emailPlaceholder=name@email.xyz contact.subject=Subject contact.subject.required=Subject is required. contact.subject.selectTab.top=Select subject... @@ -183,6 +193,7 @@ contact.context.file.intro={0}\n\nYou have just been sent the following message contact.context.file.ending=\n\n---\n\n{0}\n{1}\n\nGo to file {2}/file.xhtml?fileId={3}\n\nYou received this email because you have been listed as a contact for the dataset. If you believe this was an error, please contact {4} at {5}. To respond directly to the individual who sent the message, simply reply to this email. contact.context.support.intro={0},\n\nThe following message was sent from {1}.\n\n---\n\n contact.context.support.ending=\n\n---\n\nMessage sent from Support contact form. +contact.sent=Message sent. # dataverseuser.xhtml account.info=Account Information @@ -628,7 +639,7 @@ harvestserver.tab.header.description=Description harvestserver.tab.header.definition=Definition Query harvestserver.tab.col.definition.default=All Published Local Datasets harvestserver.tab.header.stats=Datasets -harvestserver.tab.col.stats.empty=No records (empty set) +harvestserver.tab.col.stats.empty=No active records ({2} {2, choice, 0#records|1#record|2#records} marked as deleted) harvestserver.tab.col.stats.results={0} {0, choice, 0#datasets|1#dataset|2#datasets} ({1} {1, choice, 0#records|1#record|2#records} exported, {2} marked as deleted) harvestserver.tab.header.action=Actions harvestserver.tab.header.action.btn.export=Run Export @@ -746,8 +757,8 @@ dashboard.card.datamove.dataset.command.error.indexingProblem=Dataset could not notification.email.create.dataverse.subject={0}: Your dataverse has been created notification.email.create.dataset.subject={0}: Dataset "{1}" has been created notification.email.dataset.created.subject={0}: Dataset "{1}" has been created -notification.email.request.file.access.subject={0}: Access has been requested for a restricted file in dataset "{1}" -notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" +notification.email.request.file.access.subject={0}: {1} {2} ({3}) requested access to dataset "{4}" +notification.email.requested.file.access.subject={0}: You have requested access to a restricted file in dataset "{1}" notification.email.grant.file.access.subject={0}: You have been granted access to a restricted file notification.email.rejected.file.access.subject={0}: Your request for access to a restricted file has been rejected notification.email.submit.dataset.subject={0}: Dataset "{1}" has been submitted for review @@ -780,10 +791,12 @@ notification.email.rejectFileAccess=Your request for access was rejected for the notification.email.createDataverse=Your new dataverse named {0} (view at {1} ) was created in {2} (view at {3} ). To learn more about what you can do with your dataverse, check out the Dataverse Management - User Guide at {4}/{5}/user/dataverse-management.html . # Bundle file editors, please note that "notification.email.createDataset" is used in a unit test notification.email.createDataset=Your new dataset named {0} (view at {1} ) was created in {2} (view at {3} ). To learn more about what you can do with a dataset, check out the Dataset Management - User Guide at {4}/{5}/user/dataset-management.html . -notification.email.wasSubmittedForReview={0} (view at {1} ) was submitted for review to be published in {2} (view at {3} ). Don''t forget to publish it or send it back to the contributor, {4} ({5})\! -notification.email.wasReturnedByReviewer={0} (view at {1} ) was returned by the curator of {2} (view at {3} ). -notification.email.wasPublished={0} (view at {1} ) was published in {2} (view at {3} ). -notification.email.publishFailedPidReg={0} (view at {1} ) in {2} (view at {3} ) could not be published due to a failure to register, or update the Global Identifier for the dataset or one of the files in it. Contact support if this continues to happen. +notification.email.wasSubmittedForReview=Your dataset named {0} (view at {1} ) was submitted for review to be published in {2} (view at {3} ). Don''t forget to publish it or send it back to the contributor, {4} ({5})\! +notification.email.wasReturnedByReviewer=Your dataset named {0} (view at {1} ) was returned by the curator of {2} (view at {3} ). +notification.email.wasReturnedByReviewerReason=Here is the curator comment: {0} +notification.email.wasReturnedByReviewer.collectionContacts=You may contact the collection administrator for more information: {0} +notification.email.wasPublished=Your dataset named {0} (view at {1} ) was published in {2} (view at {3} ). +notification.email.publishFailedPidReg=Your dataset named {0} (view at {1} ) in {2} (view at {3} ) could not be published due to a failure to register, or update the Global Identifier for the dataset or one of the files in it. Contact support if this continues to happen. notification.email.closing=\n\nYou may contact us for support at {0}.\n\nThank you,\n{1} notification.email.closing.html=

    You may contact us for support at {0}.

    Thank you,
    {1} notification.email.assignRole=You are now {0} for the {1} "{2}" (view at {3} ). @@ -828,6 +841,7 @@ dataverse.curationLabels.disabled=Disabled dataverse.category=Category dataverse.category.title=The type that most closely reflects this dataverse. dataverse.guestbookentryatrequest.title=Whether Guestbooks are displayed to users when they request file access or when they download files. +dataverse.pidProvider.title=The source of PIDs (DOIs, Handles, etc.) when a new PID is created. dataverse.type.selectTab.top=Select one... dataverse.type.selectTab.researchers=Researcher dataverse.type.selectTab.researchProjects=Research Project @@ -872,7 +886,7 @@ dataverse.option.deleteDataverse=Delete Dataverse dataverse.publish.btn=Publish dataverse.publish.header=Publish Dataverse dataverse.nopublished=No Published Dataverses -dataverse.nopublished.tip=In order to use this feature you must have at least one published dataverse. +dataverse.nopublished.tip=In order to use this feature you must have at least one published or linked dataverse. dataverse.contact=Email Dataverse Contact dataverse.link=Link Dataverse dataverse.link.btn.tip=Link to Your Dataverse @@ -921,12 +935,19 @@ dataverse.update.failure=This dataverse was not able to be updated. dataverse.selected=Selected dataverse.listing.error=Fatal error trying to list the contents of the dataverse. Please report this error to the Dataverse administrator. dataverse.datasize=Total size of the files stored in this dataverse: {0} bytes +dataverse.storage.quota.allocation=Total quota allocation for this collection: {0} bytes +dataverse.storage.quota.notdefined=No quota defined for this collection +dataverse.storage.quota.updated=Storage quota successfully set for the collection +dataverse.storage.quota.deleted=Storage quota successfully disabled for the collection +dataverse.storage.quota.superusersonly=Only superusers can change storage quotas. +dataverse.storage.use=Total recorded size of the files stored in this collection (user-uploaded files plus the versions in the archival tab-delimited format when applicable): {0} bytes dataverse.datasize.ioerror=Fatal IO error while trying to determine the total size of the files stored in the dataverse. Please report this error to the Dataverse administrator. dataverse.inherited=(inherited from enclosing Dataverse) dataverse.default=(Default) dataverse.metadatalanguage.setatdatasetcreation=Chosen at Dataset Creation dataverse.guestbookentry.atdownload=Guestbook Entry At Download dataverse.guestbookentry.atrequest=Guestbook Entry At Access Request +dataverse.updateinputlevels.error.invalidfieldtypename=Invalid dataset field type name: {0} # rolesAndPermissionsFragment.xhtml # advanced.xhtml @@ -939,18 +960,16 @@ advanced.search.header.datasets=Datasets advanced.search.header.files=Files advanced.search.files.name.tip=The name given to identify the file. advanced.search.files.description.tip=A summary describing the file and its variables. -advanced.search.files.persistentId.tip=The persistent identifier for the file. advanced.search.files.persistentId=Data File Persistent ID -advanced.search.files.persistentId.tip=The unique persistent identifier for a data file, which can be a Handle or DOI in Dataverse. +advanced.search.files.persistentId.tip=The unique persistent identifier for the file. advanced.search.files.fileType=File Type advanced.search.files.fileType.tip=The file type, e.g. Comma Separated Values, Plain Text, R, etc. advanced.search.files.variableName=Variable Name advanced.search.files.variableName.tip=The name of the variable's column in the data frame. advanced.search.files.variableLabel=Variable Label advanced.search.files.variableLabel.tip=A short description of the variable. -advanced.search.datasets.persistentId.tip=The persistent identifier for the Dataset. advanced.search.datasets.persistentId=Persistent Identifier -advanced.search.datasets.persistentId.tip=The Dataset's unique persistent identifier, either a DOI or Handle +advanced.search.datasets.persistentId.tip=The persistent identifier for the Dataset. advanced.search.files.fileTags=File Tags advanced.search.files.fileTags.tip=Terms such "Documentation", "Data", or "Code" that have been applied to files. @@ -1006,12 +1025,15 @@ dataverse.results.btn.sort.option.relevance=Relevance dataverse.results.cards.foundInMetadata=Found in Metadata Fields: dataverse.results.cards.files.tabularData=Tabular Data dataverse.results.solrIsDown=Please note: Due to an internal error, browsing and searching is not available. +dataverse.results.solrIsTemporarilyUnavailable=Search Engine service (Solr) is temporarily unavailable because of high load. Please try again later. +dataverse.results.solrIsTemporarilyUnavailable.extraText=Note that all the datasets that are part of this collection are accessible via direct links and registered DOIs. +dataverse.results.solrFacetsDisabled=Facets temporarily unavailable. dataverse.theme.title=Theme dataverse.theme.inheritCustomization.title=For this dataverse, use the same theme as the parent dataverse. dataverse.theme.inheritCustomization.label=Inherit Theme dataverse.theme.inheritCustomization.checkbox=Inherit theme from {0} dataverse.theme.logo=Logo -dataverse.theme.logo.tip=Supported image types are JPG, TIF, or PNG and should be no larger than 500 KB. The maximum display size for an image file in a dataverse's theme is 940 pixels wide by 120 pixels high. +dataverse.theme.logo.tip=Supported image types are JPG and PNG, must be no larger than 500 KB. The maximum display size for an image file in a dataverse's theme is 940 pixels wide by 120 pixels high. dataverse.theme.logo.format=Logo Format dataverse.theme.logo.format.selectTab.square=Square dataverse.theme.logo.format.selectTab.rectangle=Rectangle @@ -1376,6 +1398,8 @@ dataset.guestbookResponse.respondent=Respondent dataset.guestbookResponse.question=Q dataset.guestbookResponse.answer=A dataset.guestbookResponse.noResponse=(No Response) +dataset.guestbookResponse.requestor.id=authenticatedUserId +dataset.guestbookResponse.requestor.identifier=authenticatedUserIdentifier # dataset.xhtml @@ -1388,6 +1412,7 @@ dataset.accessBtn.header.explore=Explore Options dataset.accessBtn.header.configure=Configure Options dataset.accessBtn.header.compute=Compute Options dataset.accessBtn.download.size=ZIP ({0}) +dataset.accessBtn.transfer.size=({0}) dataset.accessBtn.too.big=The dataset is too large to download. Please select the files you need from the files table. dataset.accessBtn.original.too.big=The dataset is too large to download in the original format. Please select the files you need from the files table. dataset.accessBtn.archival.too.big=The dataset is too large to download in the archival format. Please select the files you need from the files table. @@ -1462,6 +1487,7 @@ dataset.status.failure.notallowed=Status update failed - label not allowed dataset.status.failure.disabled=Status labeling disabled for this dataset dataset.status.failure.isReleased=Latest version of dataset is already released. Status can only be set on draft versions dataset.rejectMessage=Return this dataset to contributor for modification. +dataset.rejectMessageReason=The reason for return entered below will be sent by email to the author. dataset.rejectMessage.label=Return to Author Reason dataset.rejectWatermark=Please enter a reason for returning this dataset to its author(s). dataset.reject.enterReason.error=Reason for return to author is required. @@ -1469,6 +1495,7 @@ dataset.reject.success=This dataset has been sent back to the contributor. dataset.reject.failure=Dataset Submission Return Failed - {0} dataset.reject.datasetNull=Cannot return the dataset to the author(s) because it is null. dataset.reject.datasetNotInReview=This dataset cannot be return to the author(s) because the latest version is not In Review. The author(s) needs to click Submit for Review first. +dataset.reject.commentNull=You must enter a reason for returning a dataset to the author(s). dataset.publish.tip=Are you sure you want to publish this dataset? Once you do so it must remain published. dataset.publish.terms.tip=This version of the dataset will be published with the following terms: dataset.publish.terms.help.tip=To change the terms for this version, click the Cancel button and go to the Terms tab for this dataset. @@ -1623,6 +1650,7 @@ dataset.metadata.alternativePersistentId.tip=A previously used persistent identi dataset.metadata.invalidEntry=is not a valid entry. dataset.metadata.invalidDate=is not a valid date. "yyyy" is a supported format. dataset.metadata.invalidNumber=is not a valid number. +dataset.metadata.invalidGeospatialCoordinates=has invalid coordinates. East must be greater than West and North must be greater than South. Missing values are NOT allowed. dataset.metadata.invalidInteger=is not a valid integer. dataset.metadata.invalidURL=is not a valid URL. dataset.metadata.invalidEmail=is not a valid email address. @@ -1646,15 +1674,19 @@ dataset.noSelectedFiles=Please select one or more files. dataset.noSelectedFilesForDownload=Please select a file or files to be downloaded. dataset.noSelectedFilesForRequestAccess=Please select a file or files for access request. dataset.embargoedSelectedFilesForRequestAccess=Embargoed files cannot be accessed. Please select an unembargoed file or files for your access request. -dataset.inValidSelectedFilesForDownload=Restricted Files Selected -dataset.inValidSelectedFilesForDownloadWithEmbargo=Embargoed and/or Restricted Files Selected -dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access. -dataset.mixedSelectedFilesForDownload=The restricted file(s) selected may not be downloaded because you have not been granted access. -dataset.mixedSelectedFilesForDownloadWithEmbargo=The embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access. - +dataset.inValidSelectedFilesForDownload=Inaccessible Files Selected +dataset.inValidSelectedFilesForDownloadWithEmbargo=Inaccessible Files Selected +dataset.inValidSelectedFilesForTransferWithEmbargo=Inaccessible Files Selected +dataset.noValidSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access or the file(s) have a retention period that has expired or the files can only be transferred via Globus. +dataset.noValidSelectedFilesForTransfer=The selected file(s) may not be transferred because you have not been granted access or the file(s) have a retention period that has expired or the files are not Globus accessible. +dataset.mixedSelectedFilesForDownload=The selected file(s) may not be downloaded because you have not been granted access or the file(s) have a retention period that has expired. +dataset.mixedSelectedFilesForDownloadWithEmbargo=Any embargoed and/or restricted file(s) selected may not be downloaded because you have not been granted access. Some files may have a retention period that has expired. Some files may only be accessible via Globus. +dataset.mixedSelectedFilesForTransfer=Some file(s) cannot be transferred. (They are restricted, embargoed, with an expired retention period, or not Globus accessible.) +dataset.inValidSelectedFilesForTransfer=Ineligible Files Selected dataset.downloadUnrestricted=Click Continue to download the files you have access to download. +dataset.transferUnrestricted=Click Continue to transfer the elligible files. -dataset.requestAccessToRestrictedFiles=You may request access to the restricted file(s) by clicking the Request Access button. +dataset.requestAccessToRestrictedFiles=You may request access to any restricted file(s) by clicking the Request Access button. dataset.requestAccessToRestrictedFilesWithEmbargo=Embargoed files cannot be accessed during the embargo period. If your selection contains restricted files, you may request access to them by clicking the Request Access button. dataset.privateurl.infoMessageAuthor=Privately share this dataset before it is published: {0} dataset.privateurl.infoMessageReviewer=This unpublished dataset is being privately shared. @@ -1828,6 +1860,18 @@ file.editEmbargoDialog.newReason=Add a reason... file.editEmbargoDialog.newDate=Select the embargo end-date file.editEmbargoDialog.remove=Remove existing embargo(es) on selected files +file.retention=Retention Period +file.editRetention=Edit Retention Period +file.editRetention.add=Add or Change +file.editRetention.delete=Remove +file.editRetentionDialog.tip=Edit the planned retention period for the selected file or files. Once this dataset version is published, you will need to contact an administrator to change the retention period end date or reason of the file or files. \n After the retention period expires the files become unavailable for download. +file.editRetentionDialog.some.tip=One or more of the selected files have already been published. Contact an administrator to change the retention period date or reason of the file or files. +file.editRetentionDialog.none.tip=The selected file or files have already been published. Contact an administrator to change the retention period date or reason of the file or files. +file.editRetentionDialog.partial.tip=Any changes you make here will not be made to these files. +file.editRetentionDialog.reason.tip=Enter a short reason why this retention period exists +file.editRetentionDialog.newReason=Add a reason... +file.editRetentionDialog.newDate=Select the retention period end date +file.editRetentionDialog.remove=Remove existing retention period(s) on selected files file.setThumbnail=Set Thumbnail file.setThumbnail.header=Set Dataset Thumbnail @@ -1840,6 +1884,7 @@ file.advancedIngestOptions=Advanced Ingest Options file.assignedDataverseImage.success={0} has been saved as the thumbnail for this dataset. file.assignedTabFileTags.success=The tags were successfully added for {0}. file.assignedEmbargo.success=An Embargo was successfully added for {0}. +file.assignedRetention.success=A Retention Period was successfully added for {0}. file.tabularDataTags=Tabular Data Tags file.tabularDataTags.tip=Select a tag to describe the type(s) of data this is (survey, time series, geospatial, etc). file.spss-savEncoding=Language Encoding @@ -1986,7 +2031,8 @@ file.deleteFileDialog.immediate=The file will be deleted after you click on the file.deleteFileDialog.multiple.immediate=The file(s) will be deleted after you click on the Delete button. file.deleteFileDialog.header=Delete Files file.deleteFileDialog.failed.tip=Files will not be removed from previously published versions of the dataset. -file.deaccessionDialog.tip=Once you deaccession this dataset it will no longer be viewable by the public. +file.deaccessionDialog.tip.permanent=Deaccession is permanent. +file.deaccessionDialog.tip=This dataset will no longer be public and a tumbstone will display the reason for deaccessioning.
    Please read the documentation if you have any questions. file.deaccessionDialog.version=Version file.deaccessionDialog.reason.question1=Which version(s) do you want to deaccession? file.deaccessionDialog.reason.question2=What is the reason for deaccession? @@ -2000,8 +2046,8 @@ file.deaccessionDialog.reason.selectItem.other=Other (Please type reason in spac file.deaccessionDialog.enterInfo=Please enter additional information about the reason for deaccession. file.deaccessionDialog.leaveURL=If applicable, please leave a URL where this dataset can be accessed after deaccessioning. file.deaccessionDialog.leaveURL.watermark=Optional dataset site, http://... -file.deaccessionDialog.deaccession.tip=Are you sure you want to deaccession? The selected version(s) will no longer be viewable by the public. -file.deaccessionDialog.deaccessionDataset.tip=Are you sure you want to deaccession this dataset? It will no longer be viewable by the public. +file.deaccessionDialog.deaccession.tip=Are you sure you want to deaccession? This is permanent and the selected version(s) will no longer be viewable by the public. +file.deaccessionDialog.deaccessionDataset.tip=Are you sure you want to deaccession this dataset? This is permanent an it will no longer be viewable by the public. file.deaccessionDialog.dialog.selectVersion.error=Please select version(s) for deaccessioning. file.deaccessionDialog.dialog.reason.error=Please select reason for deaccessioning. file.deaccessionDialog.dialog.url.error=Please enter valid forwarding URL. @@ -2097,7 +2143,7 @@ dataset.thumbnailsAndWidget.thumbnails.title=Thumbnail dataset.thumbnailsAndWidget.widgets.title=Widgets dataset.thumbnailsAndWidget.thumbnailImage=Thumbnail Image dataset.thumbnailsAndWidget.thumbnailImage.title=The logo or image file you wish to display as the thumbnail of this dataset. -dataset.thumbnailsAndWidget.thumbnailImage.tip=Supported image types are JPG, TIF, or PNG and should be no larger than {0} KB. The maximum display size for an image file as a dataset thumbnail is 48 pixels wide by 48 pixels high. +dataset.thumbnailsAndWidget.thumbnailImage.tip=Supported image types are JPG and PNG, must be no larger than {0} KB. The maximum display size for an image file as a dataset thumbnail is 140 pixels wide by 140 pixels high. dataset.thumbnailsAndWidget.thumbnailImage.default=Default Icon dataset.thumbnailsAndWidget.thumbnailImage.selectAvailable=Select Available File dataset.thumbnailsAndWidget.thumbnailImage.selectThumbnail=Select Thumbnail @@ -2158,6 +2204,8 @@ file.metadataTab.fileMetadata.type.label=Type file.metadataTab.fileMetadata.description.label=Description file.metadataTab.fileMetadata.publicationDate.label=Publication Date file.metadataTab.fileMetadata.embargoReason.label=Embargo Reason +file.metadataTab.fileMetadata.retentionDate.label=Retention End Date +file.metadataTab.fileMetadata.retentionReason.label=Retention Reason file.metadataTab.fileMetadata.metadataReleaseDate.label=Metadata Release Date file.metadataTab.fileMetadata.depositDate.label=Deposit Date file.metadataTab.fileMetadata.hierarchy.label=File Path @@ -2187,6 +2235,15 @@ ingest.csv.lineMismatch=Mismatch between line counts in first and final passes!, ingest.csv.recordMismatch=Reading mismatch, line {0} of the Data file: {1} delimited values expected, {2} found. ingest.csv.nullStream=Stream can't be null. +file.ingest=Ingest +file.uningest=Uningest +file.ingest.alreadyIngestedWarning=This file has already been ingested +file.ingest.ingestInProgressWarning=Ingestion of this file is already in progress +file.ingest.cantIngestFileWarning=Ingest not supported for this file type +file.ingest.ingestQueued=Ingestion has been requested +file.ingest.cantUningestFileWarning=This file cannot be uningested +file.uningest.complete=Uningestion of this file has been completed + # editdatafile.xhtml # editFilesFragment.xhtml @@ -2282,20 +2339,6 @@ citationFrame.banner.closeIcon=Close this message, go to dataset citationFrame.banner.countdownMessage= This message will close in citationFrame.banner.countdownMessage.seconds=seconds -# Friendly AuthenticationProvider names -authenticationProvider.name.builtin=Dataverse -authenticationProvider.name.null=(provider is unknown) -authenticationProvider.name.github=GitHub -authenticationProvider.name.google=Google -authenticationProvider.name.orcid=ORCiD -authenticationProvider.name.orcid-sandbox=ORCiD Sandbox -authenticationProvider.name.shib=Shibboleth -ingest.csv.invalidHeader=Invalid header row. One of the cells is empty. -ingest.csv.lineMismatch=Mismatch between line counts in first and final passes!, {0} found on first pass, but {1} found on second. -ingest.csv.recordMismatch=Reading mismatch, line {0} of the Data file: {1} delimited values expected, {2} found. -ingest.csv.nullStream=Stream can't be null. -citationFrame.banner.countdownMessage.seconds=seconds - #file-edit-popup-fragment.xhtml #editFilesFragment.xhtml dataset.access.accessHeader=Restrict Access dataset.access.accessHeader.invalid.state=Define Data Access @@ -2379,10 +2422,6 @@ api.prov.error.freeformMissingJsonKey=The JSON object you send must have a key c api.prov.error.freeformNoText=No provenance free form text available for this file. api.prov.error.noDataFileFound=Could not find a file based on ID. -bagit.sourceOrganization=Dataverse Installation () -bagit.sourceOrganizationAddress= -bagit.sourceOrganizationEmail= - bagit.checksum.validation.error=Invalid checksum for file "{0}". Manifest checksum={2}, calculated checksum={3}, type={1} bagit.checksum.validation.exception=Error while calculating checksum for file "{0}". Checksum type={1}, error={2} bagit.validation.bag.file.not.found=Invalid BagIt package: "{0}" @@ -2602,6 +2641,7 @@ pid.allowedCharacters=^[A-Za-z0-9._/:\\-]* command.exception.only.superusers={1} can only be called by superusers. command.exception.user.deactivated={0} failed: User account has been deactivated. command.exception.user.deleted={0} failed: User account has been deleted. +command.exception.user.ratelimited={0} failed: Rate limited due to too many requests. #Admin-API admin.api.auth.mustBeSuperUser=Forbidden. You must be a superuser. @@ -2621,7 +2661,10 @@ admin.api.deleteUser.success=Authenticated User {0} deleted. #Files.java files.api.metadata.update.duplicateFile=Filename already exists at {0} files.api.no.draft=No draft available for this file +files.api.no.draftOrUnauth=Dataset version cannot be found or unauthorized. +files.api.notFoundInVersion="File metadata for file with id {0} in dataset version {1} not found" files.api.only.tabular.supported=This operation is only available for tabular files. +files.api.fileNotFound=File could not be found. #Datasets.java datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. @@ -2638,6 +2681,7 @@ datasets.api.datasize.ioerror=Fatal IO error while trying to determine the total datasets.api.grant.role.not.found.error=Cannot find role named ''{0}'' in dataverse {1} datasets.api.grant.role.cant.create.assignment.error=Cannot create assignment: {0} datasets.api.grant.role.assignee.not.found.error=Assignee not found +datasets.api.grant.role.assignee.has.role.error=User already has this role for this dataset datasets.api.revoke.role.not.found.error="Role assignment {0} not found" datasets.api.revoke.role.success=Role {0} revoked for assignee {1} in {2} datasets.api.privateurl.error.datasetnotfound=Could not find dataset. @@ -2651,6 +2695,21 @@ datasets.api.version.files.invalid.order.criteria=Invalid order criteria: {0} datasets.api.version.files.invalid.access.status=Invalid access status: {0} datasets.api.deaccessionDataset.invalid.version.identifier.error=Only {0} or a specific version can be deaccessioned datasets.api.deaccessionDataset.invalid.forward.url=Invalid deaccession forward URL: {0} +datasets.api.globusdownloaddisabled=File transfer from Dataverse via Globus is not available for this dataset. +datasets.api.globusdownloadnotfound=List of files to transfer not found. +datasets.api.globusuploaddisabled=File transfer to Dataverse via Globus is not available for this dataset. +datasets.api.pidgenerator.notfound=No PID Generator configured for the give id. +datasets.api.thumbnail.fileToLarge=File is larger than maximum size: {0} +datasets.api.thumbnail.nonDatasetFailed=In setNonDatasetFileAsThumbnail could not generate thumbnail from uploaded file. +datasets.api.thumbnail.notDeleted=User wanted to remove the thumbnail it still has one! +datasets.api.thumbnail.actionNotSupported=Whatever you are trying to do to the dataset thumbnail is not supported. +datasets.api.thumbnail.nonDatasetsFileIsNull=In setNonDatasetFileAsThumbnail uploadedFile was null. +datasets.api.thumbnail.inputStreamToFile.exception=In setNonDatasetFileAsThumbnail caught exception calling inputStreamToFile: {0} +datasets.api.thumbnail.missing=Dataset thumbnail is unexpectedly absent. +datasets.api.thumbnail.basedOnWrongFileId=Dataset thumbnail should be based on file id {0} but instead it is {1} +datasets.api.thumbnail.fileNotFound=Could not find file based on id supplied: {0} +datasets.api.thumbnail.fileNotSupplied=A file was not selected to be the new dataset thumbnail. +datasets.api.thumbnail.noChange=No changes to save. #Dataverses.java dataverses.api.update.default.contributor.role.failure.role.not.found=Role {0} not found. @@ -2663,6 +2722,7 @@ dataverses.api.move.dataverse.failure.not.published=Published dataverse may not dataverses.api.move.dataverse.error.guestbook=Dataset guestbook is not in target dataverse. dataverses.api.move.dataverse.error.template=Dataverse template is not in target dataverse. dataverses.api.move.dataverse.error.featured=Dataverse is featured in current dataverse. +dataverses.api.delete.featured.collections.successful=Featured dataverses have been removed dataverses.api.move.dataverse.error.metadataBlock=Dataverse metadata block is not in target dataverse. dataverses.api.move.dataverse.error.dataverseLink=Dataverse is linked to target dataverse or one of its parents. dataverses.api.move.dataverse.error.datasetLink=Dataset is linked to target dataverse or one of its parents. @@ -2670,6 +2730,9 @@ dataverses.api.move.dataverse.error.forceMove=Please use the parameter ?forceMov dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial version in the dataset json dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json +dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java access.api.allowRequests.failure.noDataset=Could not find Dataset with id: {0} @@ -2682,6 +2745,8 @@ access.api.fileAccess.failure.noUser=Could not find user to execute command: {0} access.api.requestAccess.failure.commandError=Problem trying request access on {0} : {1} access.api.requestAccess.failure.requestExists=An access request for this file on your behalf already exists. access.api.requestAccess.failure.invalidRequest=You may not request access to this file. It may already be available to you. +access.api.requestAccess.failure.retentionExpired=You may not request access to this file. It is not available because its retention period has ended. + access.api.requestAccess.noKey=You must provide a key to request access to a file. access.api.requestAccess.fileNotFound=Could not find datafile with id {0}. access.api.requestAccess.invalidRequest=This file is already available to you for download or you have a pending request @@ -2821,7 +2886,22 @@ passwdVal.passwdReq.lowercase=lowercase passwdVal.passwdReq.letter=letter passwdVal.passwdReq.numeral=numeral passwdVal.passwdReq.special=special +#mydata API (DataRetriverAPI.java and MyDataFinder.java) dataretrieverAPI.noMsgResultsFound=Sorry, no results were found. +dataretrieverAPI.authentication.required=Requires authentication. Please login. +dataretrieverAPI.authentication.required.opt=retrieveMyDataAsJsonString. User not found! Shouldn't be using this anyway. +dataretrieverAPI.user.not.found=No user found for: "{0}" +dataretrieverAPI.solr.error=Sorry! There was an error with the search service. +dataretrieverAPI.solr.error.opt=Sorry! There was a Solr Error. +myDataFilterParams.error.no.user=Sorry! No user was found! +myDataFilterParams.error.result.no.role=No results. Please select at least one Role. +myDataFilterParams.error.result.no.dvobject=No results. Please select one of Dataverses, Datasets, Files. +myDataFilterParams.error.result.no.publicationStatus=No results. Please select one of {0}. +myDataFinder.error.result.null=Sorry, the authenticated user ID could not be retrieved. +myDataFinder.error.result.no.role=Sorry, you have no assigned roles. +myDataFinder.error.result.role.empty=Sorry, nothing was found for this role: {0} +myDataFinder.error.result.roles.empty=Sorry, nothing was found for these roles: {0} +myDataFinder.error.result.no.dvobject=Sorry, you have no assigned Dataverses, Datasets, or Files. #xlsxfilereader.java xlsxfilereader.ioexception.parse=Could not parse Excel/XLSX spreadsheet. {0} @@ -2878,6 +2958,7 @@ Public=Public Restricted=Restricted EmbargoedThenPublic=Embargoed then Public EmbargoedThenRestricted=Embargoed then Restricted +RetentionPeriodExpired=Retention Period Expired #metadata source - Facet Label Harvested=Harvested @@ -2930,3 +3011,8 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste api.errors.invalidApiToken=Invalid API token. api.ldninbox.citation.alert={0},

    The {1} has just been notified that the {2}, {3}, cites "{6}" in this repository. api.ldninbox.citation.subject={0}: A Dataset Citation has been reported! + +#Info.java +openapi.exception.invalid.format=Invalid format {0}, currently supported formats are YAML and JSON. +openapi.exception=Supported format definition not found. +openapi.exception.unaligned=Unaligned parameters on Headers [{0}] and Request [{1}] diff --git a/src/main/java/propertyFiles/License.properties b/src/main/java/propertyFiles/License.properties index 6ded8c41d5b..6602f408a12 100644 --- a/src/main/java/propertyFiles/License.properties +++ b/src/main/java/propertyFiles/License.properties @@ -2,3 +2,5 @@ license.cc0_1.0.description=Creative Commons CC0 1.0 Universal Public Domain Ded license.cc_by_4.0.description=Creative Commons Attribution 4.0 International License. license.cc0_1.0.name=CC0 1.0 license.cc_by_4.0.name=CC BY 4.0 +license.custom_terms.name=Custom Terms + diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties index 70b0c4e371e..5c1a22bfd5f 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileName.properties @@ -2,3 +2,5 @@ Makefile=text/x-makefile Snakemake=text/x-snakemake Dockerfile=application/x-docker-file Vagrantfile=application/x-vagrant-file +ro-crate-metadata.json=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" +ro-crate-metadata.jsonld=application/ld+json; profile="http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate" diff --git a/src/main/java/propertyFiles/MimeTypeDisplay.properties b/src/main/java/propertyFiles/MimeTypeDisplay.properties index 295ac226fa1..8e5a251abbf 100644 --- a/src/main/java/propertyFiles/MimeTypeDisplay.properties +++ b/src/main/java/propertyFiles/MimeTypeDisplay.properties @@ -207,6 +207,7 @@ audio/ogg=OGG Audio audio/wav=Waveform Audio audio/x-wav=Waveform Audio audio/x-wave=Waveform Audio +audio/vnd.wave=Waveform Audio # Video video/avi=AVI Video video/x-msvideo=AVI Video @@ -222,5 +223,6 @@ text/xml-graphml=GraphML Network Data application/octet-stream=Unknown application/x-docker-file=Docker Image File application/x-vagrant-file=Vagrant Image File +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=RO-Crate metadata # Dataverse-specific application/vnd.dataverse.file-package=Dataverse Package diff --git a/src/main/java/propertyFiles/MimeTypeFacets.properties b/src/main/java/propertyFiles/MimeTypeFacets.properties index aaab66f20ae..0dad8daff4c 100644 --- a/src/main/java/propertyFiles/MimeTypeFacets.properties +++ b/src/main/java/propertyFiles/MimeTypeFacets.properties @@ -209,6 +209,7 @@ audio/ogg=Audio audio/wav=Audio audio/x-wav=Audio audio/x-wave=Audio +audio/vnd.wave=Audio # (anything else that looks like audio/* will also be indexed as facet type "Audio") # Video video/avi=Video @@ -224,5 +225,6 @@ video/webm=Video text/xml-graphml=Network Data # Other application/octet-stream=Unknown +application/ld+json;\u0020profile\u003d\u0022http\u003a//www.w3.org/ns/json-ld#flattened\u0020http\u003a//www.w3.org/ns/json-ld#compacted\u0020https\u003a//w3id.org/ro/crate\u0022=Metadata # Dataverse-specific application/vnd.dataverse.file-package=Data diff --git a/src/main/java/propertyFiles/biomedical.properties b/src/main/java/propertyFiles/biomedical.properties index 1bffed2ee03..7392ba823c4 100644 --- a/src/main/java/propertyFiles/biomedical.properties +++ b/src/main/java/propertyFiles/biomedical.properties @@ -96,7 +96,6 @@ controlledvocabulary.studyAssayMeasurementType.targeted_sequencing=targeted sequ controlledvocabulary.studyAssayMeasurementType.transcription_factor_binding_(chip-seq)=transcription factor binding (ChIP-Seq) controlledvocabulary.studyAssayMeasurementType.transcription_factor_binding_site_identification=transcription factor binding site identification controlledvocabulary.studyAssayMeasurementType.transcription_profiling=transcription profiling -controlledvocabulary.studyAssayMeasurementType.transcription_profiling=transcription profiling controlledvocabulary.studyAssayMeasurementType.transcription_profiling_(microarray)=transcription profiling (Microarray) controlledvocabulary.studyAssayMeasurementType.transcription_profiling_(rna-seq)=transcription profiling (RNA-Seq) controlledvocabulary.studyAssayMeasurementType.trap_translational_profiling=TRAP translational profiling diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index f35ede79b50..cb864eb78e9 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -22,6 +22,7 @@ datasetfieldtype.dsDescriptionValue.title=Text datasetfieldtype.dsDescriptionDate.title=Date datasetfieldtype.subject.title=Subject datasetfieldtype.keyword.title=Keyword +datasetfieldtype.keywordTermURI.title=Term URI datasetfieldtype.keywordValue.title=Term datasetfieldtype.keywordVocabulary.title=Controlled Vocabulary Name datasetfieldtype.keywordVocabularyURI.title=Controlled Vocabulary URL @@ -100,6 +101,7 @@ datasetfieldtype.dsDescriptionValue.description=A summary describing the purpose datasetfieldtype.dsDescriptionDate.description=The date when the description was added to the Dataset. If the Dataset contains more than one description, e.g. the data producer supplied one description and the data repository supplied another, this date is used to distinguish between the descriptions datasetfieldtype.subject.description=The area of study relevant to the Dataset datasetfieldtype.keyword.description=A key term that describes an important aspect of the Dataset and information about any controlled vocabulary used +datasetfieldtype.keywordTermURI.description=A URI that points to the web presence of the Keyword Term datasetfieldtype.keywordValue.description=A key term that describes important aspects of the Dataset datasetfieldtype.keywordVocabulary.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.keywordVocabularyURI.description=The URL where one can access information about the term's controlled vocabulary @@ -178,6 +180,7 @@ datasetfieldtype.dsDescriptionValue.watermark= datasetfieldtype.dsDescriptionDate.watermark=YYYY-MM-DD datasetfieldtype.subject.watermark= datasetfieldtype.keyword.watermark= +datasetfieldtype.keywordTermURI.watermark=https:// datasetfieldtype.keywordValue.watermark= datasetfieldtype.keywordVocabulary.watermark= datasetfieldtype.keywordVocabularyURI.watermark=https:// diff --git a/src/main/java/propertyFiles/customGSD.properties b/src/main/java/propertyFiles/customGSD.properties index 40dc0328053..2375596fe2f 100644 --- a/src/main/java/propertyFiles/customGSD.properties +++ b/src/main/java/propertyFiles/customGSD.properties @@ -161,7 +161,6 @@ controlledvocabulary.gsdFacultyName.mcloskey,_karen=MCloskey, Karen controlledvocabulary.gsdFacultyName.mehrotra,_rahul=Mehrotra, Rahul controlledvocabulary.gsdFacultyName.menchaca,_alejandra=Menchaca, Alejandra controlledvocabulary.gsdFacultyName.menges,_achim=Menges, Achim -controlledvocabulary.gsdFacultyName.menges,_achim=Menges, Achim controlledvocabulary.gsdFacultyName.michalatos,_panagiotis=Michalatos, Panagiotis controlledvocabulary.gsdFacultyName.moe,_kiel=Moe, Kiel controlledvocabulary.gsdFacultyName.molinsky,_jennifer=Molinsky, Jennifer @@ -507,7 +506,6 @@ controlledvocabulary.gsdCourseName.06323:_brownfields_practicum=06323: Brownfiel controlledvocabulary.gsdCourseName.06333:_aquatic_ecology=06333: Aquatic Ecology controlledvocabulary.gsdCourseName.06335:_phytotechnologies=06335: Phytotechnologies controlledvocabulary.gsdCourseName.06337:_changing_natural_and_built_coastal_environments=06337: Changing Natural and Built Coastal Environments -controlledvocabulary.gsdCourseName.06337:_changing_natural_and_built_coastal_environments=06337: Changing Natural and Built Coastal Environments controlledvocabulary.gsdCourseName.06338:_introduction_to_computational_design=06338: Introduction to Computational Design controlledvocabulary.gsdCourseName.06436:_expanded_mechanisms_/_empirical_materialisms=06436: Expanded Mechanisms / Empirical Materialisms controlledvocabulary.gsdCourseName.06450:_high_performance_buildings_and_systems_integration=06450: High Performance Buildings and Systems Integration diff --git a/src/main/java/propertyFiles/geospatial.properties b/src/main/java/propertyFiles/geospatial.properties index 04db8d3d05f..2659c2a3cc9 100644 --- a/src/main/java/propertyFiles/geospatial.properties +++ b/src/main/java/propertyFiles/geospatial.properties @@ -8,10 +8,10 @@ datasetfieldtype.city.title=City datasetfieldtype.otherGeographicCoverage.title=Other datasetfieldtype.geographicUnit.title=Geographic Unit datasetfieldtype.geographicBoundingBox.title=Geographic Bounding Box -datasetfieldtype.westLongitude.title=West Longitude -datasetfieldtype.eastLongitude.title=East Longitude -datasetfieldtype.northLongitude.title=North Latitude -datasetfieldtype.southLongitude.title=South Latitude +datasetfieldtype.westLongitude.title=Westernmost (Left) Longitude +datasetfieldtype.eastLongitude.title=Easternmost (Right) Longitude +datasetfieldtype.northLatitude.title=Northernmost (Top) Latitude +datasetfieldtype.southLatitude.title=Southernmost (Bottom) Latitude datasetfieldtype.geographicCoverage.description=Information on the geographic coverage of the data. Includes the total geographic scope of the data. datasetfieldtype.country.description=The country or nation that the Dataset is about. datasetfieldtype.state.description=The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. @@ -19,10 +19,10 @@ datasetfieldtype.city.description=The name of the city that the Dataset is about datasetfieldtype.otherGeographicCoverage.description=Other information on the geographic coverage of the data. datasetfieldtype.geographicUnit.description=Lowest level of geographic aggregation covered by the Dataset, e.g., village, county, region. datasetfieldtype.geographicBoundingBox.description=The fundamental geometric description for any Dataset that models geography is the geographic bounding box. It describes the minimum box, defined by west and east longitudes and north and south latitudes, which includes the largest geographic extent of the Dataset's geographic coverage. This element is used in the first pass of a coordinate-based search. Inclusion of this element in the codebook is recommended, but is required if the bound polygon box is included. -datasetfieldtype.westLongitude.description=Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= West Bounding Longitude Value <= 180,0. -datasetfieldtype.eastLongitude.description=Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180,0 <= East Bounding Longitude Value <= 180,0. -datasetfieldtype.northLongitude.description=Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= North Bounding Latitude Value <= 90,0. -datasetfieldtype.southLongitude.description=Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90,0 <= South Bounding Latitude Value <= 90,0. +datasetfieldtype.westLongitude.description=Westernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= West Bounding Longitude Value <= 180.0. +datasetfieldtype.eastLongitude.description=Easternmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -180.0 <= East Bounding Longitude Value <= 180.0. +datasetfieldtype.northLatitude.description=Northernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= North Bounding Latitude Value <= 90.0. +datasetfieldtype.southLatitude.description=Southernmost coordinate delimiting the geographic extent of the Dataset. A valid range of values, expressed in decimal degrees, is -90.0 <= South Bounding Latitude Value <= 90.0. datasetfieldtype.geographicCoverage.watermark= datasetfieldtype.country.watermark= datasetfieldtype.state.watermark= @@ -32,8 +32,8 @@ datasetfieldtype.geographicUnit.watermark= datasetfieldtype.geographicBoundingBox.watermark= datasetfieldtype.westLongitude.watermark= datasetfieldtype.eastLongitude.watermark= -datasetfieldtype.northLongitude.watermark= -datasetfieldtype.southLongitude.watermark= +datasetfieldtype.northLatitude.watermark= +datasetfieldtype.southLatitude.watermark= controlledvocabulary.country.afghanistan=Afghanistan controlledvocabulary.country.albania=Albania controlledvocabulary.country.algeria=Algeria @@ -89,10 +89,10 @@ controlledvocabulary.country.cook_islands=Cook Islands controlledvocabulary.country.costa_rica=Costa Rica controlledvocabulary.country.croatia=Croatia controlledvocabulary.country.cuba=Cuba -controlledvocabulary.country.curacao=Cura�ao +controlledvocabulary.country.curacao=Cura\u00e7ao controlledvocabulary.country.cyprus=Cyprus controlledvocabulary.country.czech_republic=Czech Republic -controlledvocabulary.country.cote_d'ivoire=C�te d'Ivoire +controlledvocabulary.country.cote_d'ivoire=C\u00f4te d'Ivoire controlledvocabulary.country.denmark=Denmark controlledvocabulary.country.djibouti=Djibouti controlledvocabulary.country.dominica=Dominica @@ -216,8 +216,8 @@ controlledvocabulary.country.qatar=Qatar controlledvocabulary.country.romania=Romania controlledvocabulary.country.russian_federation=Russian Federation controlledvocabulary.country.rwanda=Rwanda -controlledvocabulary.country.reunion=R�union -controlledvocabulary.country.saint_barthelemy=Saint Barth�lemy +controlledvocabulary.country.reunion=R\u00e9union +controlledvocabulary.country.saint_barthelemy=Saint Barth\u00e9lemy controlledvocabulary.country.saint_helena,_ascension_and_tristan_da_cunha=Saint Helena, Ascension and Tristan da Cunha controlledvocabulary.country.saint_kitts_and_nevis=Saint Kitts and Nevis controlledvocabulary.country.saint_lucia=Saint Lucia @@ -282,4 +282,4 @@ controlledvocabulary.country.western_sahara=Western Sahara controlledvocabulary.country.yemen=Yemen controlledvocabulary.country.zambia=Zambia controlledvocabulary.country.zimbabwe=Zimbabwe -controlledvocabulary.country.aland_islands=�land Islands +controlledvocabulary.country.aland_islands=\u00c5land Islands diff --git a/src/main/java/propertyFiles/staticSearchFields.properties b/src/main/java/propertyFiles/staticSearchFields.properties index ab03de64f23..53d0080b87c 100644 --- a/src/main/java/propertyFiles/staticSearchFields.properties +++ b/src/main/java/propertyFiles/staticSearchFields.properties @@ -3,6 +3,7 @@ staticSearchFields.metadata_type_ss=Dataset Feature staticSearchFields.dvCategory=Dataverse Category staticSearchFields.metadataSource=Metadata Source staticSearchFields.publicationDate=Publication Year +staticSearchFields.license=License staticSearchFields.fileTypeGroupFacet=File Type staticSearchFields.dvObjectType=Type staticSearchFields.fileTag=File Tag diff --git a/src/main/resources/META-INF/javamail.default.address.map b/src/main/resources/META-INF/javamail.default.address.map new file mode 100644 index 00000000000..b1115c9dc8c --- /dev/null +++ b/src/main/resources/META-INF/javamail.default.address.map @@ -0,0 +1,2 @@ +# See https://jakartaee.github.io/mail-api/docs/api/jakarta.mail/jakarta/mail/Session.html +rfc822=smtp diff --git a/src/main/resources/META-INF/microprofile-config.properties b/src/main/resources/META-INF/microprofile-config.properties index 504b5e46735..b0bc92cf975 100644 --- a/src/main/resources/META-INF/microprofile-config.properties +++ b/src/main/resources/META-INF/microprofile-config.properties @@ -18,6 +18,7 @@ dataverse.build= dataverse.files.directory=${STORAGE_DIR:/tmp/dataverse} dataverse.files.uploads=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/uploads dataverse.files.docroot=${STORAGE_DIR:${com.sun.aas.instanceRoot}}/docroot +dataverse.files.globus-cache-maxage=5 # SEARCH INDEX dataverse.solr.host=localhost @@ -41,6 +42,11 @@ dataverse.rserve.user=rserve dataverse.rserve.password=rserve dataverse.rserve.tempdir=/tmp/Rserv +# MAIL +dataverse.mail.debug=false +dataverse.mail.mta.auth=false +dataverse.mail.mta.allow-utf8-addresses=true + # OAI SERVER dataverse.oai.server.maxidentifiers=100 dataverse.oai.server.maxrecords=10 @@ -49,17 +55,6 @@ dataverse.oai.server.maxsets=100 # can be customized via the setting below: #dataverse.oai.server.repositoryname= -# PERSISTENT IDENTIFIER PROVIDERS -# EZID -dataverse.pid.ezid.api-url=https://ezid.cdlib.org - -# DataCite -dataverse.pid.datacite.mds-api-url=https://mds.test.datacite.org -dataverse.pid.datacite.rest-api-url=https://api.test.datacite.org - -# Handle.Net -dataverse.pid.handlenet.index=300 - # AUTHENTICATION dataverse.auth.oidc.pkce.max-cache-size=10000 dataverse.auth.oidc.pkce.max-cache-age=300 diff --git a/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql new file mode 100644 index 00000000000..b6157e6a782 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.3__10095-guestbook-at-request2.sql @@ -0,0 +1,34 @@ +-- This creates a function that ESTIMATES the size of the +-- GuestbookResponse table (for the metrics display), instead +-- of relying on straight "SELECT COUNT(*) ..." +-- It uses statistics to estimate the number of guestbook entries +-- and the fraction of them related to downloads, +-- i.e. those that weren't created for 'AccessRequest' events. +-- Significant potential savings for an active installation. +-- See https://github.com/IQSS/dataverse/issues/8840 and +-- https://github.com/IQSS/dataverse/pull/8972 for more details + +CREATE OR REPLACE FUNCTION estimateGuestBookResponseTableSize() +RETURNS bigint AS $$ +DECLARE + estimatedsize bigint; +BEGIN + SELECT CASE WHEN relpages<10 THEN 0 + ELSE ((reltuples / relpages) + * (pg_relation_size('public.guestbookresponse') / current_setting('block_size')::int))::bigint + * (SELECT CASE WHEN ((select count(*) from pg_stats where tablename='guestbookresponse') = 0 + OR (select array_position(most_common_vals::text::text[], 'AccessRequest') + FROM pg_stats WHERE tablename='guestbookresponse' AND attname='eventtype') IS NULL) THEN 1 + ELSE 1 - (SELECT (most_common_freqs::text::text[])[array_position(most_common_vals::text::text[], 'AccessRequest')]::float + FROM pg_stats WHERE tablename='guestbookresponse' and attname='eventtype') END) + END + FROM pg_class + WHERE oid = 'public.guestbookresponse'::regclass INTO estimatedsize; + + if estimatedsize = 0 then + SELECT COUNT(id) FROM guestbookresponse WHERE eventtype!= 'AccessRequest' INTO estimatedsize; + END if; + + RETURN estimatedsize; +END; +$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql b/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql new file mode 100644 index 00000000000..260f191f557 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.4__10093-privateurluser_id_update.sql @@ -0,0 +1 @@ + update roleassignment set assigneeidentifier=replace(assigneeidentifier, '#','!') where assigneeidentifier like '#%'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql new file mode 100644 index 00000000000..d6c067056ec --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.5__8549-collection-quotas.sql @@ -0,0 +1,91 @@ +-- The somewhat convoluted queries below populate the storage sizes for the entire +-- DvObject tree, fast. It IS possible, to do it all with one recursive PostgresQL +-- query, that will crawl the tree from the leaves (DataFiles) up and add up the +-- sizes for all the Datasets/Collections above. Unfortunately, that appears to take +-- some hours on a database the size of the one at IQSS. So what we are doing +-- instead is first compute the total sizes of all the *directly* linked objects, +-- with a couple of linear queries. This will correctly calculate the sizes of all the +-- Datasets (since they can only contain DataFiles, without any other hierarchy) and +-- those Collections that only contain Datasets; but not the sizes of Collections that +-- have sub-collections. To take any sub-collections into account we will then run +-- a recursive query - but we only need to run it on the tree of Collections only, +-- which makes it reasonably fast on any real life instance. +-- *Temporarily* add this "tempstoragesize" column to the DvObject table. +-- It will be used to calculate the storage sizes of all the DvObjectContainers +-- (Datasets and Collections), as a matter of convenience. Once calculated, the values +-- will will be moved to the permanent StorageUse table. +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS tempStorageSize BIGINT; +-- First we calculate the storage size of each individual dataset (a simple sum +-- of the storage sizes of all the files in the dataset). +-- For datafiles, the storage size = main file size by default +-- (we are excluding any harvested files and datasets): +UPDATE dvobject SET tempStorageSize=o.combinedStorageSize +FROM (SELECT datasetobject.id, SUM(file.filesize) AS combinedStorageSize +FROM dvobject fileobject, dataset datasetobject, datafile file +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND datasetobject.harvestingclient_id IS null +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + +-- ... but for ingested tabular files the size of the saved original needs to be added, since +-- those also take space: +-- (should be safe to assume that there are no *harvested ingested* files) +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(dt.originalFileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, datatable dt +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND dt.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + +-- there may also be some auxiliary files registered in the database, such as +-- the content generated and deposited by external tools - diff. privacy stats +-- being one of the example. These are also considered the "payload" files that +-- we want to count for the purposes of calculating storage use. +UPDATE dvobject SET tempStorageSize=tempStorageSize+o.combinedStorageSize +FROM (SELECT datasetobject.id, COALESCE(SUM(aux.fileSize),0) AS combinedStorageSize +FROM dvobject fileobject, dvobject datasetobject, datafile file, auxiliaryFile aux +WHERE fileobject.owner_id = datasetobject.id +AND fileobject.id = file.id +AND aux.datafile_id = file.id +GROUP BY datasetobject.id) o, dataset ds WHERE o.id = dvobject.id AND dvobject.dtype='Dataset' AND dvobject.id = ds.id AND ds.harvestingclient_id IS null; + + +-- ... and then we can repeat the same for collections, by setting the storage size +-- to the sum of the storage sizes of the datasets *directly* in each collection: +-- (no attemp is made yet to recursively count the sizes all the chilld sub-collections) +UPDATE dvobject SET tempStorageSize=o.combinedStorageSize +FROM (SELECT collectionobject.id, SUM(datasetobject.tempStorageSize) AS combinedStorageSize +FROM dvobject datasetobject, dvobject collectionobject +WHERE datasetobject.owner_id = collectionobject.id +AND datasetobject.tempStorageSize IS NOT null +GROUP BY collectionobject.id) o WHERE o.id = dvobject.id AND dvobject.dtype='Dataverse'; + +-- And now we will update the storage sizes of all the Collection ("Dataverse") objects +-- that contain sub-collections, *recursively*, to add their sizes to the totals: +WITH RECURSIVE treestorage (id, owner_id, tempStorageSize, dtype) AS +( + -- All dataverses: + SELECT id, owner_id, tempStorageSize, dtype + FROM dvobject + WHERE dtype = 'Dataverse' + + UNION ALL + + -- Recursive Member: + SELECT dvobject.id, treestorage.owner_id, dvobject.tempStorageSize, treestorage.dtype + FROM treestorage, dvobject + WHERE treestorage.id = dvobject.owner_id + AND dvobject.dtype = 'Dataverse' +) +UPDATE dvobject SET tempStorageSize=tempStorageSize+(SELECT COALESCE(SUM(tempStorageSize),0) +FROM treestorage WHERE owner_id=dvobject.id) +WHERE dvobject.dtype = 'Dataverse' +AND dvobject.id IN (SELECT owner_id FROM treestorage WHERE owner_id IS NOT null); + +-- And, finally, we can move these calculated storage sizes of datasets and +-- collection to the dedicated new table StorageUse: +INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT id, tempstoragesize FROM dvobject WHERE dtype = 'Dataverse'); +INSERT INTO storageuse (dvobjectcontainer_id,sizeinbytes) (SELECT d.id, o.tempstoragesize FROM dvobject o, dataset d WHERE o.id = d.id AND d.harvestingclient_id IS NULL); +-- ... and drop the temporary column we added to DvObject earlier: +ALTER TABLE dvobject DROP column tempStorageSize diff --git a/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql new file mode 100644 index 00000000000..156960d2011 --- /dev/null +++ b/src/main/resources/db/migration/V6.0.0.6__9506-track-thumb-failures.sql @@ -0,0 +1 @@ +ALTER TABLE dvobject ADD COLUMN IF NOT EXISTS previewimagefail BOOLEAN DEFAULT FALSE; diff --git a/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql new file mode 100644 index 00000000000..8e311c06b32 --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.1__9728-universe-variablemetadata.sql @@ -0,0 +1,2 @@ +-- increase field universe from 255 to text +ALTER TABLE variablemetadata ALTER COLUMN universe TYPE text; diff --git a/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql new file mode 100644 index 00000000000..7c52a00107a --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.2__8524-store-tabular-files-with-varheaders.sql @@ -0,0 +1 @@ +ALTER TABLE datatable ADD COLUMN IF NOT EXISTS storedWithVariableHeader BOOLEAN DEFAULT FALSE; diff --git a/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql b/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql new file mode 100644 index 00000000000..6cb3a455e4e --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.3__9983-missing-unique-constraints.sql @@ -0,0 +1,16 @@ +DO $$ +BEGIN + + BEGIN + ALTER TABLE externalvocabularyvalue ADD CONSTRAINT externalvocabularvalue_uri_key UNIQUE(uri); + EXCEPTION + WHEN duplicate_table THEN RAISE NOTICE 'Table unique constraint externalvocabularvalue_uri_key already exists'; + END; + + BEGIN + ALTER TABLE oaiset ADD CONSTRAINT oaiset_spec_key UNIQUE(spec); + EXCEPTION + WHEN duplicate_table THEN RAISE NOTICE 'Table unique constraint oaiset_spec_key already exists'; + END; + +END $$; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql b/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql new file mode 100644 index 00000000000..2ab8cbc802e --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.4__5645-geospatial-fieldname-fix.sql @@ -0,0 +1,7 @@ +UPDATE datasetfieldtype +SET name = 'northLatitude' +WHERE name = 'northLongitude'; + +UPDATE datasetfieldtype +SET name = 'southLatitude' +WHERE name = 'southLongitude'; \ No newline at end of file diff --git a/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql b/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql new file mode 100644 index 00000000000..1d11e178abf --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.5__3623-multiple-pid-providers.sql @@ -0,0 +1,2 @@ +ALTER TABLE dataverse ADD COLUMN IF NOT EXISTS pidgeneratorspecs TEXT; +ALTER TABLE dataset ADD COLUMN IF NOT EXISTS pidgeneratorspecs TEXT; diff --git a/src/main/resources/db/migration/V6.1.0.6.sql b/src/main/resources/db/migration/V6.1.0.6.sql new file mode 100644 index 00000000000..c9942fb8480 --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.6.sql @@ -0,0 +1,2 @@ +-- Add flag to allow harvesting client to handle missing CVV values +ALTER TABLE harvestingclient ADD COLUMN IF NOT EXISTS allowharvestingmissingcvv BOOLEAN; diff --git a/src/main/resources/db/migration/V6.1.0.7.sql b/src/main/resources/db/migration/V6.1.0.7.sql new file mode 100644 index 00000000000..470483e2bf4 --- /dev/null +++ b/src/main/resources/db/migration/V6.1.0.7.sql @@ -0,0 +1 @@ +ALTER TABLE authenticateduser ADD COLUMN IF NOT EXISTS ratelimittier int DEFAULT 1; diff --git a/src/main/resources/db/migration/V6.2.0.1.sql b/src/main/resources/db/migration/V6.2.0.1.sql new file mode 100644 index 00000000000..cb23d589542 --- /dev/null +++ b/src/main/resources/db/migration/V6.2.0.1.sql @@ -0,0 +1 @@ +ALTER TABLE datafile ADD COLUMN IF NOT EXISTS retention_id BIGINT; \ No newline at end of file diff --git a/src/main/resources/edu/harvard/iq/dataverse/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml similarity index 100% rename from src/main/resources/edu/harvard/iq/dataverse/datacite_metadata_template.xml rename to src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml diff --git a/src/main/webapp/WEB-INF/pretty-config.xml b/src/main/webapp/WEB-INF/pretty-config.xml index ab5f37a1051..5f8f4877af8 100644 --- a/src/main/webapp/WEB-INF/pretty-config.xml +++ b/src/main/webapp/WEB-INF/pretty-config.xml @@ -27,4 +27,9 @@ + + + + + \ No newline at end of file diff --git a/src/main/webapp/contactFormFragment.xhtml b/src/main/webapp/contactFormFragment.xhtml index cb4eb3d0872..470a137e6cf 100644 --- a/src/main/webapp/contactFormFragment.xhtml +++ b/src/main/webapp/contactFormFragment.xhtml @@ -37,7 +37,7 @@
    + validator="#{sendFeedbackDialog.validateUserEmail}" validatorMessage="#{bundle['contact.from.invalid']}" requiredMessage="#{bundle['contact.from.required']}" required="#{param['DO_VALIDATION']}" placeholder="#{bundle['contact.from.emailPlaceholder']}"/>
    @@ -81,7 +81,7 @@
    + update="@form,messagePanel" oncomplete="if (args && !args.validationFailed) PF('contactForm').hide();" actionListener="#{sendFeedbackDialog.sendMessage}">
    diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index c54d94442ea..6d5b0a5fe4f 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -482,7 +482,7 @@
    - #{bundle['file.dataFilesTab.terms.list.guestbook']}   + #{bundle['file.dataFilesTab.terms.list.guestbook']}  
    @@ -524,7 +524,7 @@ + update=":datasetForm:previewGuestbook" oncomplete="PF('viewGuestbook').show();"/>
    @@ -563,7 +563,7 @@ + update=":datasetForm:previewGuestbook" oncomplete="PF('viewGuestbook').show();"/> diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 2f76197e508..936d354e9d7 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -32,7 +32,7 @@ and !permissionsWrapper.canIssuePublishDatasetCommand(DatasetPage.dataset)}"/> - + @@ -178,7 +186,7 @@
  • + oncomplete="showPopup(false);"> #{bundle.download} @@ -192,7 +200,7 @@
  • #{bundle.downloadOriginal} @@ -208,7 +216,7 @@
  • - #{bundle.downloadArchival} @@ -228,15 +236,21 @@ - -
  • - - - -
  • - - + +
  • + + #{bundle.transfer} + + + + + +
  • +
    + + @@ -580,7 +594,7 @@
    -
    +
    @@ -991,8 +1005,10 @@ - + + +

    #{bundle['dataset.share.datasetShare.tip']}

    @@ -1041,10 +1057,10 @@
    - +

    #{bundle['dataset.noValidSelectedFilesForDownload']}

    -

    #{DatasetPage.cantDownloadDueToEmbargo ? bundle['dataset.requestAccessToRestrictedFilesWithEmbargo'] : bundle['dataset.requestAccessToRestrictedFiles']}

    +

    #{DatasetPage.cantDownloadDueToEmbargoOrDVAccess ? bundle['dataset.requestAccessToRestrictedFilesWithEmbargo'] : bundle['dataset.requestAccessToRestrictedFiles']}

    + +

    #{bundle['dataset.noValidSelectedFilesForTransfer']}

    + +

    #{DatasetPage.cantDownloadDueToEmbargoOrDVAccess ? bundle['dataset.requestAccessToRestrictedFilesWithEmbargo'] : bundle['dataset.requestAccessToRestrictedFiles']}

    +
    +
    + +
    +

    #{bundle['file.zip.download.exceeds.limit.info']}

    @@ -1073,8 +1100,8 @@
    - -

    #{DatasetPage.cantDownloadDueToEmbargo ? bundle['dataset.mixedSelectedFilesForDownloadWithEmbargo'] : bundle['dataset.mixedSelectedFilesForDownload']}

    + +

    #{DatasetPage.cantDownloadDueToEmbargoOrDVAccess ? bundle['dataset.mixedSelectedFilesForDownloadWithEmbargo'] : bundle['dataset.mixedSelectedFilesForDownload']}

    @@ -1095,6 +1122,28 @@ + +

    #{bundle['dataset.mixedSelectedFilesForTransfer']}

    +
    + + + + + +
    #{resFile.label}
    +
    +

    #{bundle['dataset.transferUnrestricted']}

    + + + +
    +

    #{bundle['file.deleteDialog.tip']}

    @@ -1194,7 +1243,12 @@ -

    #{bundle['file.deaccessionDialog.tip']}

    +
    +   +


    + +

    +
    + @@ -1807,30 +1862,37 @@
    + +

    - #{bundle['dataset.rejectMessage']} + #{bundle['dataset.rejectMessage']} #{disableReasonField ? '':bundle['dataset.rejectMessageReason']}

    - - - + -

    - -
    + + + +
    - + + +
    - + @@ -1911,10 +1973,14 @@ $('button[id$="updateOwnerDataverse"]').trigger('click'); } - function showPopup() { + function showPopup(isTransfer) { var outcome = document.getElementById("datasetForm:validateFilesOutcome").value; if (outcome ==='Mixed'){ - PF('downloadMixed').show(); + if(isTransfer) { + PF('globusTransferMixed').show(); + } else { + PF('downloadMixed').show(); + } } if (outcome ==='FailEmpty'){ PF('selectFilesForDownload').show(); @@ -1923,7 +1989,11 @@ PF('downloadTooLarge').show(); } if (outcome ==='FailRestricted'){ - PF('downloadInvalid').show(); + if(isTransfer) { + PF('transferInvalid').show(); + } else { + PF('downloadInvalid').show(); + } } if (outcome ==='GuestbookRequired'){ PF('guestbookAndTermsPopup').show(); diff --git a/src/main/webapp/datasetFieldForEditFragment.xhtml b/src/main/webapp/datasetFieldForEditFragment.xhtml index e72ee351ea0..d8c005366cb 100644 --- a/src/main/webapp/datasetFieldForEditFragment.xhtml +++ b/src/main/webapp/datasetFieldForEditFragment.xhtml @@ -42,6 +42,7 @@ + diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index 41e2807c4fd..7f70f28e194 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -283,6 +283,19 @@
    +
    + + #{bundle.pidProviderOption} + + +
    + + + + +
    +
    diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 2426cf980d3..9ed8b5209b6 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -178,9 +178,6 @@ #{DataverseUserPage.getRequestorEmail(item)} - - #{DataverseUserPage.getReasonForReturn(item.theObject)} - diff --git a/src/main/webapp/editFilesFragment.xhtml b/src/main/webapp/editFilesFragment.xhtml index fff047f494f..6fab335c0f3 100644 --- a/src/main/webapp/editFilesFragment.xhtml +++ b/src/main/webapp/editFilesFragment.xhtml @@ -377,13 +377,13 @@
    - - + #{fileMetadata.label} diff --git a/src/main/webapp/editdatafiles.xhtml b/src/main/webapp/editdatafiles.xhtml index 02acb224827..be78359e02b 100644 --- a/src/main/webapp/editdatafiles.xhtml +++ b/src/main/webapp/editdatafiles.xhtml @@ -75,8 +75,10 @@ - + + + diff --git a/src/main/webapp/file-download-button-fragment.xhtml b/src/main/webapp/file-download-button-fragment.xhtml index 8ef2af40431..cd6a6b06523 100644 --- a/src/main/webapp/file-download-button-fragment.xhtml +++ b/src/main/webapp/file-download-button-fragment.xhtml @@ -14,16 +14,18 @@
  • - + value=" #{dataFileServiceBean.isRetentionExpired(fileMetadata) ? bundle['retentionExpired'] : !fileDownloadHelper.isRestrictedOrEmbargoed(fileMetadata) ? bundle['public'] : (!fileDownloadHelper.canDownloadFile(fileMetadata) ? (!dataFileServiceBean.isActivelyEmbargoed(fileMetadata) ? bundle['restricted'] : bundle['embargoed']) : (!dataFileServiceBean.isActivelyEmbargoed(fileMetadata) ? bundle['restrictedaccess'] : bundle['embargoed']) )}" + styleClass="#{dataFileServiceBean.isRetentionExpired(fileMetadata) ? 'text-danger' : !fileDownloadHelper.isRestrictedOrEmbargoed(fileMetadata) ? 'text-success' : (!fileDownloadHelper.canDownloadFile(fileMetadata) ? 'text-danger' : 'text-success')}"/>
  • + and fileMetadata.dataFile.owner.fileAccessRequest + and !dataFileServiceBean.isActivelyEmbargoed(fileMetadata) + and !dataFileServiceBean.isRetentionExpired(fileMetadata)}"> -
  • +
  • - GT: #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType} + #{bundle['file.globus.of']} #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
  • @@ -112,7 +114,7 @@ #{fileMetadata.dataFile.friendlyType == 'Unknown' ? bundle['file.download.filetype.unknown'] : fileMetadata.dataFile.friendlyType}
    + + + +
  • + + + +
  • +
    + +
  • + + + +
  • +
    @@ -102,6 +118,18 @@ + +
  • + + + + +
  • +
    +
  • diff --git a/src/main/webapp/file-edit-popup-fragment.xhtml b/src/main/webapp/file-edit-popup-fragment.xhtml index ffc4a1fcef7..3b1141816c8 100644 --- a/src/main/webapp/file-edit-popup-fragment.xhtml +++ b/src/main/webapp/file-edit-popup-fragment.xhtml @@ -168,7 +168,83 @@ PF('blockDatasetForm').hide();" action="#{bean.clearEmbargoPopup()}" update="#{updateElements}" immediate="true"/>
  • - + + + + +

    #{bundle['file.editRetentionDialog.tip']}

    +

    #{bundle['file.editRetentionDialog.some.tip']} #{bundle['file.editRetentionDialog.partial.tip']}

    +

    #{bundle['file.editRetentionDialog.none.tip']}

    + + +
    + +
    +
    + +
    +
    +
    + + + + +
    + +
    +
    +
    +

    #{bundle['file.editRetentionDialog.reason.tip']}

    + +
    +
    +
    +
    +
    +
    +
    + +
    +
    + + + +
    +
    +
    +
    +
    + + +
    +
    +

    #{bundle['file.deleteFileDialog.immediate']}

    diff --git a/src/main/webapp/file-info-fragment.xhtml b/src/main/webapp/file-info-fragment.xhtml index ca82738f920..dca5c4a8cec 100644 --- a/src/main/webapp/file-info-fragment.xhtml +++ b/src/main/webapp/file-info-fragment.xhtml @@ -28,8 +28,8 @@
    - - + @@ -64,6 +64,7 @@
    +
    diff --git a/src/main/webapp/file.xhtml b/src/main/webapp/file.xhtml index f69b5c35afd..835764d9cf5 100644 --- a/src/main/webapp/file.xhtml +++ b/src/main/webapp/file.xhtml @@ -43,7 +43,7 @@
    #{FilePage.fileMetadata.label} - +
    @@ -64,22 +64,23 @@

    - + - + + - + - + + value="#{bundle['file.DatasetVersion']} #{FilePage.fileMetadata.datasetVersion.versionNumber}.#{FilePage.fileMetadata.datasetVersion.minorVersionNumber}"/>
    @@ -98,9 +99,9 @@ - -
    @@ -112,19 +113,19 @@
  • @@ -145,11 +146,11 @@ - - -
    @@ -161,19 +162,19 @@
  • @@ -192,7 +193,7 @@
    -
    - +

    #{bundle['file.compute.fileAccessDenied']}

    @@ -690,7 +707,7 @@ - - - - - - - - - - - diff --git a/src/main/webapp/guestbook-terms-popup-fragment.xhtml b/src/main/webapp/guestbook-terms-popup-fragment.xhtml index 34df0c79390..5948047d845 100644 --- a/src/main/webapp/guestbook-terms-popup-fragment.xhtml +++ b/src/main/webapp/guestbook-terms-popup-fragment.xhtml @@ -274,8 +274,17 @@ + + + + diff --git a/src/main/webapp/metadataFragment.xhtml b/src/main/webapp/metadataFragment.xhtml index 200d2917b9a..43d54f64c43 100755 --- a/src/main/webapp/metadataFragment.xhtml +++ b/src/main/webapp/metadataFragment.xhtml @@ -88,8 +88,9 @@ escape="#{dsf.datasetFieldType.isEscapeOutputText()}"> + - + + - + @@ -123,7 +125,7 @@
    - + @@ -146,9 +148,11 @@ rendered="${cvocOnCvPart and cvPart.key.datasetFieldType.name.equals(cvocConf.get(cvPart.key.datasetFieldType.id).getString('term-uri-field'))}"> + - + + + - + + + + + + + + - +
    @@ -285,14 +302,14 @@
    + id="unique1" rendered="#{!dsf.datasetFieldType.allowMultiples}" filter="#{(dsf.datasetFieldType.controlledVocabularyValues.size() lt 10) ? 'false':'true'}" filterMatchMode="contains"> @@ -368,12 +385,12 @@ - + @@ -387,7 +404,7 @@ rendered="#{subdsf.datasetFieldType.allowMultiples}" label="#{bundle.select}" multiple="true" filter="#{(subdsf.datasetFieldType.controlledVocabularyValues.size() lt 10) ? 'false':'true'}" filterMatchMode="contains" showHeader="#{(subdsf.datasetFieldType.controlledVocabularyValues.size() lt 10) ? 'false':'true'}"> - +
    diff --git a/src/main/webapp/resources/css/structure.css b/src/main/webapp/resources/css/structure.css index 470c07d4534..64d07038f2f 100644 --- a/src/main/webapp/resources/css/structure.css +++ b/src/main/webapp/resources/css/structure.css @@ -483,7 +483,7 @@ span.search-term-match {font-weight: bold;} [id$='resultsTable'] div.card-title-icon-block span.label {vertical-align:15%} [id$='resultsTable'] div.card-preview-icon-block {width:48px; float:left; margin:4px 12px 6px 0;} [id$='resultsTable'] div.card-preview-icon-block a {display:block; height:48px; line-height:48px;} -[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle;} +[id$='resultsTable'] div.card-preview-icon-block img {vertical-align:middle; max-width: 64px; max-height: 48px; padding-right: 10px;} [id$='resultsTable'] div.card-preview-icon-block span[class^='icon'], [id$='resultsTable'] div.card-preview-icon-block span[class^='glyphicon'] {font-size:2.8em;} @@ -1121,6 +1121,40 @@ padding-right:0px; font-weight: 700; } +/*RETENTION*/ +.retentionright { + margin-left:10px; +} +.retentioncheckbox { + margin-top: 20px; + padding-bottom: 30px; +} +.retentionmsg { + display:block; +} +.retention { + display:flex; + flex-direction:column; +} +.retention.text-warning, .retention.text-danger { + flex-direction:row; +} +.retention span { + margin-right: 10px; +} +.retention span.glyphicon-asterisk { + font-size: .5em; + vertical-align: top; +} + +.retention .col-lg-3 { + padding-left:0px; +} + +.retentionlabel { + font-weight: 700; +} + /*Accessibility*/ .text-muted { color: #707070; diff --git a/src/main/webapp/resources/js/fileupload.js b/src/main/webapp/resources/js/fileupload.js index 08d6956b62c..03ec82f214b 100644 --- a/src/main/webapp/resources/js/fileupload.js +++ b/src/main/webapp/resources/js/fileupload.js @@ -192,41 +192,45 @@ var fileUpload = class fileUploadClass { progBar.html(''); progBar.append($('').attr('class', 'ui-progressbar ui-widget ui-widget-content ui-corner-all')); if(this.urls.hasOwnProperty("url")) { - $.ajax({ - url: this.urls.url, - headers: { "x-amz-tagging": "dv-state=temp" }, - type: 'PUT', - data: this.file, - context:this, - cache: false, - processData: false, - success: function() { - //ToDo - cancelling abandons the file. It is marked as temp so can be cleaned up later, but would be good to remove now (requires either sending a presigned delete URL or adding a callback to delete only a temp file - if(!cancelled) { - this.reportUpload(); - } - }, - error: function(jqXHR, textStatus, errorThrown) { - console.log('Failure: ' + jqXHR.status); - console.log('Failure: ' + errorThrown); - uploadFailure(jqXHR, thisFile); - }, - xhr: function() { - var myXhr = $.ajaxSettings.xhr(); - if (myXhr.upload) { - myXhr.upload.addEventListener('progress', function(e) { - if (e.lengthComputable) { - var doublelength = 2 * e.total; - progBar.children('progress').attr({ - value: e.loaded, - max: doublelength - }); - } - }); + const url = this.urls.url; + const request = { + url: url, + type: 'PUT', + data: this.file, + context:this, + cache: false, + processData: false, + success: function() { + //ToDo - cancelling abandons the file. It is marked as temp so can be cleaned up later, but would be good to remove now (requires either sending a presigned delete URL or adding a callback to delete only a temp file + if(!cancelled) { + this.reportUpload(); + } + }, + error: function(jqXHR, textStatus, errorThrown) { + console.log('Failure: ' + jqXHR.status); + console.log('Failure: ' + errorThrown); + uploadFailure(jqXHR, thisFile); + }, + xhr: function() { + var myXhr = $.ajaxSettings.xhr(); + if (myXhr.upload) { + myXhr.upload.addEventListener('progress', function(e) { + if (e.lengthComputable) { + var doublelength = 2 * e.total; + progBar.children('progress').attr({ + value: e.loaded, + max: doublelength + }); + } + }); + } + return myXhr; } - return myXhr; + }; + if (url.includes("x-amz-tagging")) { + request.headers = { "x-amz-tagging": "dv-state=temp" }; } - }); + $.ajax(request); } else { var loaded=[]; this.etags=[]; diff --git a/src/main/webapp/roles-assign.xhtml b/src/main/webapp/roles-assign.xhtml index 4b31f10dbfc..93b9862c55d 100644 --- a/src/main/webapp/roles-assign.xhtml +++ b/src/main/webapp/roles-assign.xhtml @@ -31,7 +31,7 @@ styleClass="DropdownPopup" panelStyleClass="DropdownPopupPanel" var="roleAssignee" itemLabel="#{roleAssignee.displayInfo.title}" itemValue="#{roleAssignee}" converter="roleAssigneeConverter"> - + diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 718df813348..505fe681363 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -36,7 +36,7 @@
    - + @@ -88,15 +88,26 @@
    + + + +
    +
    +
    + +
    +
    +
    +
    -
    +
    @@ -290,7 +312,7 @@

    - [+] #{bundle['dataverse.results.empty.link.technicalDetails']} + [+] #{bundle['dataverse.results.empty.link.technicalDetails']}

    @@ -361,12 +383,13 @@ - + @@ -399,7 +422,7 @@
    - + #{of:format1(bundle['alt.logo'], result.name)} @@ -508,7 +531,7 @@ - + @@ -556,12 +579,13 @@ +
    - + #{result.title} @@ -572,7 +596,7 @@ - + @@ -604,6 +628,7 @@ + @@ -624,7 +649,7 @@ - + @@ -632,11 +657,12 @@ +
    - + #{result.name} @@ -645,7 +671,7 @@ diff --git a/src/main/webapp/search-include-pager.xhtml b/src/main/webapp/search-include-pager.xhtml index ad8fb316dda..88714099c00 100644 --- a/src/main/webapp/search-include-pager.xhtml +++ b/src/main/webapp/search-include-pager.xhtml @@ -15,7 +15,7 @@
    • - + @@ -29,7 +29,7 @@
    • - + @@ -46,7 +46,7 @@ end="#{Math:min(SearchIncludeFragment.totalPages,SearchIncludeFragment.page+Math:max(2,5-SearchIncludeFragment.page))}" varStatus="pageStatus">
    • - + @@ -65,7 +65,7 @@
    • - + @@ -79,7 +79,7 @@
    • - + diff --git a/src/main/webapp/search/advanced.xhtml b/src/main/webapp/search/advanced.xhtml index 06e662064e7..24b78ba0dc9 100644 --- a/src/main/webapp/search/advanced.xhtml +++ b/src/main/webapp/search/advanced.xhtml @@ -121,6 +121,7 @@ + diff --git a/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java b/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java index 4097adb0be6..23a7efedca7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DataCitationTest.java @@ -378,6 +378,36 @@ public void testTitleWithQuotes() throws ParseException { } + @Test + public void testFileCitationToStringHtml() throws ParseException { + DatasetVersion dsv = createATestDatasetVersion("Dataset Title", true); + FileMetadata fileMetadata = new FileMetadata(); + fileMetadata.setLabel("foo.txt"); + fileMetadata.setDataFile(new DataFile()); + dsv.setVersionState(DatasetVersion.VersionState.RELEASED); + fileMetadata.setDatasetVersion(dsv); + dsv.setDataset(dsv.getDataset()); + DataCitation fileCitation = new DataCitation(fileMetadata, false); + assertEquals("First Last, 1955, \"Dataset Title\", https://doi.org/10.5072/FK2/LK0D1H, LibraScholar, V1; foo.txt [fileName]", fileCitation.toString(true)); + } + + @Test + public void testFileCitationToStringHtmlFilePid() throws ParseException { + DatasetVersion dsv = createATestDatasetVersion("Dataset Title", true); + FileMetadata fileMetadata = new FileMetadata(); + fileMetadata.setLabel("foo.txt"); + DataFile dataFile = new DataFile(); + dataFile.setProtocol("doi"); + dataFile.setAuthority("10.42"); + dataFile.setIdentifier("myFilePid"); + fileMetadata.setDataFile(dataFile); + dsv.setVersionState(DatasetVersion.VersionState.RELEASED); + fileMetadata.setDatasetVersion(dsv); + dsv.setDataset(dsv.getDataset()); + DataCitation fileCitation = new DataCitation(fileMetadata, true); + assertEquals("First Last, 1955, \"foo.txt\", Dataset Title, https://doi.org/10.42/myFilePid, LibraScholar, V1", fileCitation.toString(true)); + } + private DatasetVersion createATestDatasetVersion(String withTitle, boolean withAuthor) throws ParseException { Dataverse dataverse = new Dataverse(); @@ -400,6 +430,7 @@ private DatasetVersion createATestDatasetVersion(String withTitle, boolean withA fields.add(createTitleField(withTitle)); } if (withAuthor) { + // TODO: "Last, First" would make more sense. fields.add(createAuthorField("First Last")); } diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java new file mode 100644 index 00000000000..873d417131d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldServiceBeanTest.java @@ -0,0 +1,179 @@ +package edu.harvard.iq.dataverse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; +import java.util.Set; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.AdditionalMatchers; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import jakarta.json.Json; +import jakarta.json.JsonObject; + +public class DatasetFieldServiceBeanTest { + + private DatasetFieldServiceBean datasetFieldServiceBean; + + static String getCvocJson(String pathToJsonFile) throws IOException { + final File datasetVersionJson = new File(pathToJsonFile); + return new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + } + + @BeforeEach + void setUp() { + this.datasetFieldServiceBean = Mockito.spy(new DatasetFieldServiceBean()); + } + + @AfterEach + void tearDown() { + this.datasetFieldServiceBean = null; + } + + @Test + void getIndexableStringsByTermUriSkosmos() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_2389"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-skosmos.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "faux bourdon")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone (insects)"))) + .add("vocabularyUri", "http://aims.fao.org/aos/agrovoc") + .add("synonyms", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Abeille mâle")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "drone honey bees"))) + .add("genericTerm", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("lang", "fr") + .add("value", "Colonie d'abeilles")) + .add(Json.createObjectBuilder() + .add("lang", "en") + .add("value", "bee colonies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordTermURL + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Set.of("faux bourdon", "drone (insects)"), result); + + // keywordValue + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordTermURL' must return values for Skosmos"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriAgroportal() throws IOException { + String fieldName = "keyword"; + String termURI = "http://aims.fao.org/aos/agrovoc/c_50265"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-agroportal.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("termName", Json.createObjectBuilder() + .add("fr", "association de quartier") + .add("en", "neighborhood associations")) + .add("vocabularyName", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("vocabularyUri", "https://data.agroportal.lirmm.fr/ontologies/AGROVOC") + .add("synonyms", Json.createObjectBuilder() + .add("en", Json.createArrayBuilder().add("neighborhood societies"))) + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // keywordValue + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordValue"); + assertEquals(Set.of("association de quartier", "neighborhood associations", "neighborhood societies"), result); + + // keywordTermURL + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, "keywordTermURL"); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Any others field + result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Collections.emptySet(), result, "Only 'keywordValue' must return values for Agroportal"); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, "keywordValue"); + assertEquals(Collections.emptySet(), result); + } + + @Test + void getIndexableStringsByTermUriOrcid() throws IOException { + String fieldName = "creator"; + String termURI = "https://orcid.org/0000-0003-4217-153X"; + + JsonObject cvocEntry = prepare(fieldName, "src/test/resources/json/cvoc-orcid.json"); + + JsonObject getExtVocabValueReturnedValue = Json.createObjectBuilder() + .add("@id", termURI) + .add("scheme", "ORCID") + .add("@type", "https://schema.org/Person") + .add("personName", "Doe, John") + .build(); + Mockito.doReturn(getExtVocabValueReturnedValue).when(datasetFieldServiceBean).getExternalVocabularyValue(termURI); + Mockito.doReturn(null).when(datasetFieldServiceBean).getExternalVocabularyValue(AdditionalMatchers.not(Mockito.eq(termURI))); + + // ORCID match with "personName" field into "getIndexableStringsByTermUri" method + Set result = datasetFieldServiceBean.getIndexableStringsByTermUri(termURI, cvocEntry, ""); + assertEquals(Set.of("Doe, John"), result); + + // Another termURI not in database + result = datasetFieldServiceBean.getIndexableStringsByTermUri("http://example.org/uuid", cvocEntry, fieldName); + assertEquals(Collections.emptySet(), result); + } + + /** + * Prepare unit tests with mock methods. + * + * @param fieldName "field-name" into cvoc configuration file + * @param jsonPath path of the JSON configuration file: src/test/resources/json/... + * @return {@link JsonObject} representing the configuration file + * @throws IOException in case on read error on configuration file + */ + JsonObject prepare(String fieldName, String jsonPath) throws IOException { + Long dftId = Long.parseLong("1"); + // DatasetFieldType name corresponding to "field-name" into cvoc configuration file + DatasetFieldType dft = new DatasetFieldType(fieldName, DatasetFieldType.FieldType.NONE, true); + dft.setId(dftId); + + Mockito.doReturn(dft).when(datasetFieldServiceBean).findByNameOpt(fieldName); + Mockito.doReturn(null).when(datasetFieldServiceBean).findByNameOpt(AdditionalMatchers.not(Mockito.eq(fieldName))); + + SettingsServiceBean settingsService = Mockito.mock(SettingsServiceBean.class); + Mockito.when(settingsService.getValueForKey(SettingsServiceBean.Key.CVocConf)).thenReturn(getCvocJson(jsonPath)); + datasetFieldServiceBean.settingsService = settingsService; + + return datasetFieldServiceBean.getCVocConf(false).get(dftId); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java index f4af88818a5..3f85acc1f87 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetFieldValueValidatorTest.java @@ -213,4 +213,19 @@ public void testInvalidEmail() { assertTrue(c.getMessage().contains("email")); }); } + @Test + public void testBoundingBoxValidity() { + // valid tests + assertTrue(DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "-90")); + assertTrue(DatasetFieldValueValidator.validateBoundingBox("0", "0", "0", "0")); + + // invalid tests + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", null, "90", null)); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, "180", null, "90")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("-180", "180", "90", "junk")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("45", "40", "90", "0")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("360", "0", "90", "-90")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox("", "", "", "")); + assertTrue(!DatasetFieldValueValidator.validateBoundingBox(null, null, null, null)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/FileDownloadHelperTest.java b/src/test/java/edu/harvard/iq/dataverse/FileDownloadHelperTest.java index 19be411584c..e1e0f6d153a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/FileDownloadHelperTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/FileDownloadHelperTest.java @@ -215,7 +215,51 @@ void testCanDownloadFile_forUnrestrictedReleasedExpiredEmbargoFile() { assertTrue(fileDownloadHelper.canDownloadFile(fileMetadata)); } - + + @Test + void testCanNotDownloadFile_forExpiredRetentionFile() { + DataFile dataFile = new DataFile(); + dataFile.setId(2L); + + // With an expired retention end date, an unrestricted file should not be accessible + + Retention ret = new Retention(LocalDate.now().minusDays(1), "Retention period expired"); + dataFile.setRetention(ret); + + DatasetVersion datasetVersion = new DatasetVersion(); + datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); + + FileMetadata fileMetadata = new FileMetadata(); + fileMetadata.setId(1L); + fileMetadata.setRestricted(false); + fileMetadata.setDataFile(dataFile); + fileMetadata.setDatasetVersion(datasetVersion); + + assertFalse(fileDownloadHelper.canDownloadFile(fileMetadata)); + } + + @Test + void testCanDownloadFile_forUnrestrictedReleasedNotExpiredRetentionFile() { + DataFile dataFile = new DataFile(); + dataFile.setId(2L); + + // With a retention end date in the future, an unrestricted file should be accessible + + Retention ret = new Retention(LocalDate.now(), "Retention period NOT expired"); + dataFile.setRetention(ret); + + DatasetVersion datasetVersion = new DatasetVersion(); + datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED); + + FileMetadata fileMetadata = new FileMetadata(); + fileMetadata.setId(1L); + fileMetadata.setRestricted(false); + fileMetadata.setDataFile(dataFile); + fileMetadata.setDatasetVersion(datasetVersion); + + assertTrue(fileDownloadHelper.canDownloadFile(fileMetadata)); + } + @ParameterizedTest @CsvSource({"false", "true"}) void testCanDownloadFile_forRestrictedReleasedFile(boolean hasPermission) { diff --git a/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java b/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java index 394f08c6e93..7065e9689e1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/GlobalIdTest.java @@ -4,6 +4,8 @@ import org.junit.jupiter.api.Test; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -19,7 +21,7 @@ public class GlobalIdTest { @Test public void testValidDOI() { System.out.println("testValidDOI"); - GlobalId instance = new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.5072","FK2/BYM3IW", "/", DOIServiceBean.DOI_RESOLVER_URL, null); + GlobalId instance = new GlobalId(AbstractDOIProvider.DOI_PROTOCOL,"10.5072","FK2/BYM3IW", "/", AbstractDOIProvider.DOI_RESOLVER_URL, null); assertEquals("doi", instance.getProtocol()); assertEquals("10.5072", instance.getAuthority()); @@ -30,7 +32,7 @@ public void testValidDOI() { @Test public void testValidHandle() { System.out.println("testValidDOI"); - GlobalId instance = new GlobalId(HandlenetServiceBean.HDL_PROTOCOL, "1902.1","111012", "/", HandlenetServiceBean.HDL_RESOLVER_URL, null); + GlobalId instance = new GlobalId(HandlePidProvider.HDL_PROTOCOL, "1902.1","111012", "/", HandlePidProvider.HDL_RESOLVER_URL, null); assertEquals("hdl", instance.getProtocol()); assertEquals("1902.1", instance.getAuthority()); @@ -57,7 +59,7 @@ public void testInject() { System.out.println("testInject (weak test)"); // String badProtocol = "hdl:'Select value from datasetfieldvalue';/ha"; - GlobalId instance = PidUtil.parseAsGlobalID(HandlenetServiceBean.HDL_PROTOCOL, "'Select value from datasetfieldvalue';", "ha"); + GlobalId instance = PidUtil.parseAsGlobalID(HandlePidProvider.HDL_PROTOCOL, "'Select value from datasetfieldvalue';", "ha"); assertNull(instance); //exception.expect(IllegalArgumentException.class); diff --git a/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanIT.java b/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanIT.java new file mode 100644 index 00000000000..17dede5e9f3 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanIT.java @@ -0,0 +1,143 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.MailSessionProducer; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import edu.harvard.iq.dataverse.util.testing.Tags; +import io.restassured.RestAssured; +import jakarta.mail.Session; +import jakarta.mail.internet.AddressException; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.util.List; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * An integration test using a fake SMTP MTA to check for outgoing mails. + * LIMITATION: This test cannot possibly check if the production and injection of the session via CDI + * works, as it is not running within a servlet container. This would require usage of Arquillian + * or and end-to-end API test with a deployed application. + */ + +@Tag(Tags.INTEGRATION_TEST) +@Tag(Tags.USES_TESTCONTAINERS) +@Testcontainers(disabledWithoutDocker = true) +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpHost", varArgs = "host") +@JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpPort", varArgs = "port") +class MailServiceBeanIT { + + private static final Integer PORT_SMTP = 1025; + private static final Integer PORT_HTTP = 1080; + + static MailServiceBean mailer; + static Session session; + static SettingsServiceBean settingsServiceBean = Mockito.mock(SettingsServiceBean.class); + static DataverseServiceBean dataverseServiceBean = Mockito.mock(DataverseServiceBean.class); + + @BeforeAll + static void setUp() { + // Setup mocks behavior, inject as deps + Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.SystemEmail)).thenReturn("noreply@example.org"); + BrandingUtil.injectServices(dataverseServiceBean, settingsServiceBean); + + // Must happen here, as we need Testcontainers to start the container first... + session = new MailSessionProducer().getSession(); + mailer = new MailServiceBean(session, settingsServiceBean); + } + + /* + Cannot use maildev/maildev here. Also MailCatcher doesn't provide official support for SMTPUTF8. + Also maildev does advertise the feature and everything is fine over the wire, both JSON API and Web UI + of maildev have an encoding problem - UTF-8 mail addresses following RFC 6530/6531/6532 are botched. + Neither MailCatcher nor MailHog have this problem, yet the API of MailCatcher is much simpler + to use during testing, which is why we are going with it. + */ + @Container + static GenericContainer maildev = new GenericContainer<>("dockage/mailcatcher") + .withExposedPorts(PORT_HTTP, PORT_SMTP) + .waitingFor(Wait.forHttp("/")); + + static String tcSmtpHost() { + return maildev.getHost(); + } + + static String tcSmtpPort() { + return maildev.getMappedPort(PORT_SMTP).toString(); + } + + @BeforeAll + static void setup() { + RestAssured.baseURI = "http://" + tcSmtpHost(); + RestAssured.port = maildev.getMappedPort(PORT_HTTP); + } + + static List mailTo() { + return List.of( + "pete@mailinator.com", // one example using ASCII only, make sure it works + "michélle.pereboom@example.com", + "begüm.vriezen@example.com", + "lótus.gonçalves@example.com", + "lótus.gonçalves@éxample.com", + "begüm.vriezen@example.cologne", + "رونیکا.محمدخان@example.com", + "lótus.gonçalves@example.cóm" + ); + } + + @ParameterizedTest + @MethodSource("mailTo") + @JvmSetting(key = JvmSettings.MAIL_MTA_SUPPORT_UTF8, value = "true") + void sendEmailIncludingUTF8(String mailAddress) { + given().when().get("/messages") + .then() + .statusCode(200); + + // given + Session session = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(session, settingsServiceBean); + + // when + boolean sent = mailer.sendSystemEmail(mailAddress, "Test", "Test üüü", false); + + // then + assertTrue(sent); + //RestAssured.get("/messages").body().prettyPrint(); + given().when().get("/messages") + .then() + .statusCode(200) + .body("last().recipients.first()", equalTo("<" + mailAddress + ">")); + } + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = "test@example.org") + @JvmSetting(key = JvmSettings.MAIL_MTA_SUPPORT_UTF8, value = "false") + void mailRejectedWhenUTF8AddressButNoSupport() throws AddressException { + // given + Session session = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(session, settingsServiceBean); + String to = "michélle.pereboom@example.com"; + + assertFalse(mailer.sendSystemEmail(to, "Test", "Test", false)); + } + +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanTest.java index 32bf9702ee7..afcc12949d6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/MailServiceBeanTest.java @@ -1,56 +1,151 @@ package edu.harvard.iq.dataverse; import edu.harvard.iq.dataverse.branding.BrandingUtilTest; -import edu.harvard.iq.dataverse.util.MailUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import jakarta.mail.internet.AddressException; +import jakarta.mail.internet.InternetAddress; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; - -import jakarta.mail.internet.InternetAddress; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; import java.io.UnsupportedEncodingException; +import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +@ExtendWith(MockitoExtension.class) class MailServiceBeanTest { - - /** - * We need to reset the BrandingUtil mocks for every test, as we rely on them being set to default. - */ - @BeforeEach - private void setup() { - BrandingUtilTest.setupMocks(); + @Nested + class Delegation { + /** + * We need to reset the BrandingUtil mocks for every test, as we rely on them being set to default. + */ + @BeforeEach + void setup() { + BrandingUtilTest.setupMocks(); + } + @AfterAll + static void tearDown() { + BrandingUtilTest.tearDownMocks(); + } + + @ParameterizedTest + @CsvSource(value = { + // with name in admin mail address + "Foo Bar , NULL, NULL, Foo Bar", + // without name, but installation branding name set + "dataverse@dataverse.org, NULL, LibraScholar Dataverse, LibraScholar Dataverse", + // without name, but root dataverse name available + "dataverse@dataverse.org, NotLibraScholar, NULL, NotLibraScholar", + // without name, without root dataverse name, without installation name -> default to bundle string. + "dataverse@dataverse.org, NULL, NULL, Dataverse Installation Admin" + }, nullValues = {"NULL"}) + void setContactDelegation(String fromMail, String rootDataverseName, String installationName, String expectedStartsWith) throws AddressException { + BrandingUtilTest.setRootDataverseName(rootDataverseName); + BrandingUtilTest.setInstallationName(installationName); + + InternetAddress fromAddress = new InternetAddress(fromMail); + MailServiceBean mailServiceBean = new MailServiceBean(); + try { + mailServiceBean.setContactDelegation("user@example.edu", fromAddress); + assertTrue(fromAddress.getPersonal().startsWith(expectedStartsWith)); + assertTrue(fromAddress.getPersonal().endsWith(" on behalf of user@example.edu")); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + } } - @AfterAll - private static void tearDown() { - BrandingUtilTest.tearDownMocks(); + + @Nested + @LocalJvmSettings + class LookupMailAddresses { + + @Mock + SettingsServiceBean settingsServiceBean; + + @InjectMocks + MailServiceBean mailServiceBean = new MailServiceBean(); + + private static final String email = "test@example.org"; + + @Test + void lookupSystemWithoutAnySetting() { + assertTrue(mailServiceBean.getSystemAddress().isEmpty()); + } + + @Test + void lookupSystemWithDBOnly() { + Mockito.when(settingsServiceBean.getValueForKey(Key.SystemEmail)).thenReturn(email); + assertEquals(email, mailServiceBean.getSystemAddress().get().getAddress()); + } + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = email) + void lookupSystemWithMPConfig() { + assertEquals(email, mailServiceBean.getSystemAddress().get().getAddress()); + } + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = email) + void lookupSystemWhereMPConfigTakesPrecedenceOverDB() { + Mockito.lenient().when(settingsServiceBean.getValueForKey(Key.SystemEmail)).thenReturn("foobar@example.org"); + assertEquals(email, mailServiceBean.getSystemAddress().get().getAddress()); + } + + @Test + void lookupSupportWithoutAnySetting() { + assertTrue(mailServiceBean.getSupportAddress().isEmpty()); + } + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = email) + void lookupSupportNotSetButWithSystemPresent() { + assertEquals(email, mailServiceBean.getSupportAddress().get().getAddress()); + } + + @Test + @JvmSetting(key = JvmSettings.SUPPORT_EMAIL, value = email) + void lookupSupportWithoutSystemSet() { + assertTrue(mailServiceBean.getSystemAddress().isEmpty()); + assertEquals(email, mailServiceBean.getSupportAddress().get().getAddress()); + } + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = email) + @JvmSetting(key = JvmSettings.SUPPORT_EMAIL, value = "support@example.org") + void lookupSupportSetWithSystemPresent() { + assertEquals(email, mailServiceBean.getSystemAddress().get().getAddress()); + assertEquals("support@example.org", mailServiceBean.getSupportAddress().get().getAddress()); + } } - @ParameterizedTest - @CsvSource(value = { - // with name in admin mail address - "Foo Bar , NULL, NULL, Foo Bar", - // without name, but installation branding name set - "dataverse@dataverse.org, NULL, LibraScholar Dataverse, LibraScholar Dataverse", - // without name, but root dataverse name available - "dataverse@dataverse.org, NotLibraScholar, NULL, NotLibraScholar", - // without name, without root dataverse name, without installation name -> default to bundle string. - "dataverse@dataverse.org, NULL, NULL, Dataverse Installation Admin" - }, nullValues = {"NULL"}) - void setContactDelegation(String fromMail, String rootDataverseName, String installationName, String expectedStartsWith) { - BrandingUtilTest.setRootDataverseName(rootDataverseName); - BrandingUtilTest.setInstallationName(installationName); - - InternetAddress fromAddress = MailUtil.parseSystemAddress(fromMail); + @Nested + @LocalJvmSettings + class SendSystemMail { + @Mock + SettingsServiceBean settingsServiceBean; + @InjectMocks MailServiceBean mailServiceBean = new MailServiceBean(); - try { - mailServiceBean.setContactDelegation("user@example.edu", fromAddress); - assertTrue(fromAddress.getPersonal().startsWith(expectedStartsWith)); - assertTrue(fromAddress.getPersonal().endsWith(" on behalf of user@example.edu")); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); + + @Test + @JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = "") + void skipIfNoSystemAddress() { + assertFalse(mailServiceBean.sendSystemEmail("target@example.org", "Test", "Test", false)); } } + } \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/PersistentIdentifierServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/PersistentIdentifierServiceBeanTest.java deleted file mode 100644 index 542d00d0d78..00000000000 --- a/src/test/java/edu/harvard/iq/dataverse/PersistentIdentifierServiceBeanTest.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse; - -import edu.harvard.iq.dataverse.engine.TestCommandContext; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.InjectMocks; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; -import org.mockito.junit.jupiter.MockitoExtension; - - -import static org.junit.jupiter.api.Assertions.*; - -/** - * - * @author michael - */ -@ExtendWith(MockitoExtension.class) -public class PersistentIdentifierServiceBeanTest { - - @Mock - private SettingsServiceBean settingsServiceBean; - - @InjectMocks - DOIEZIdServiceBean ezidServiceBean = new DOIEZIdServiceBean(); - @InjectMocks - DOIDataCiteServiceBean dataCiteServiceBean = new DOIDataCiteServiceBean(); - @InjectMocks - FakePidProviderServiceBean fakePidProviderServiceBean = new FakePidProviderServiceBean(); - HandlenetServiceBean hdlServiceBean = new HandlenetServiceBean(); - PermaLinkPidProviderServiceBean permaLinkServiceBean = new PermaLinkPidProviderServiceBean(); - - CommandContext ctxt; - - @BeforeEach - public void setup() { - MockitoAnnotations.initMocks(this); - ctxt = new TestCommandContext(){ - @Override - public HandlenetServiceBean handleNet() { - return hdlServiceBean; - } - - @Override - public DOIDataCiteServiceBean doiDataCite() { - return dataCiteServiceBean; - } - - @Override - public DOIEZIdServiceBean doiEZId() { - return ezidServiceBean; - } - - @Override - public FakePidProviderServiceBean fakePidProvider() { - return fakePidProviderServiceBean; - } - - @Override - public PermaLinkPidProviderServiceBean permaLinkProvider() { - return permaLinkServiceBean; - } - - }; - } - - /** - * Test of getBean method, of class PersistentIdentifierServiceBean. - */ - @Test - public void testGetBean_String_CommandContext_OK() { - ctxt.settings().setValueForKey( SettingsServiceBean.Key.DoiProvider, "EZID"); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider, "")).thenReturn("EZID"); - - assertEquals(ezidServiceBean, - GlobalIdServiceBean.getBean("doi", ctxt)); - - ctxt.settings().setValueForKey( SettingsServiceBean.Key.DoiProvider, "DataCite"); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider, "")).thenReturn("DataCite"); - - assertEquals(dataCiteServiceBean, - GlobalIdServiceBean.getBean("doi", ctxt)); - - ctxt.settings().setValueForKey(SettingsServiceBean.Key.DoiProvider, "FAKE"); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider, "")).thenReturn("FAKE"); - - assertEquals(fakePidProviderServiceBean, - GlobalIdServiceBean.getBean("doi", ctxt)); - - assertEquals(hdlServiceBean, - GlobalIdServiceBean.getBean("hdl", ctxt)); - - assertEquals(permaLinkServiceBean, - GlobalIdServiceBean.getBean("perma", ctxt)); - } - - @Test - public void testGetBean_String_CommandContext_BAD() { - ctxt.settings().setValueForKey( SettingsServiceBean.Key.DoiProvider, "non-existent-provider"); - assertNull(GlobalIdServiceBean.getBean("doi", ctxt)); - - - assertNull(GlobalIdServiceBean.getBean("non-existent-protocol", ctxt)); - } - - /** - * Test of getBean method, of class PersistentIdentifierServiceBean. - */ - @Test - public void testGetBean_CommandContext() { - ctxt.settings().setValueForKey( SettingsServiceBean.Key.Protocol, "doi"); - ctxt.settings().setValueForKey( SettingsServiceBean.Key.DoiProvider, "EZID"); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider, "")).thenReturn("EZID"); - - assertEquals(ezidServiceBean, - GlobalIdServiceBean.getBean("doi", ctxt)); - - ctxt.settings().setValueForKey( SettingsServiceBean.Key.Protocol, "hdl"); - assertEquals(hdlServiceBean, - GlobalIdServiceBean.getBean("hdl", ctxt)); - - ctxt.settings().setValueForKey( SettingsServiceBean.Key.Protocol, "perma"); - assertEquals(permaLinkServiceBean, - GlobalIdServiceBean.getBean("perma", ctxt)); - } - - -} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java index 42e21e53101..d08f916243f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AccessIT.java @@ -198,6 +198,8 @@ public void testDownloadSingleFile() { //Not logged in non-restricted Response anonDownloadOriginal = UtilIT.downloadFileOriginal(tabFile1Id); Response anonDownloadConverted = UtilIT.downloadFile(tabFile1Id); + Response anonDownloadConvertedNullKey = UtilIT.downloadFile(tabFile1Id, null); + // ... and download the same tabular data file, but without the variable name header added: Response anonDownloadTabularNoHeader = UtilIT.downloadTabularFileNoVarHeader(tabFile1Id); // ... and download the same tabular file, this time requesting the "format=tab" explicitly: @@ -206,6 +208,8 @@ public void testDownloadSingleFile() { assertEquals(OK.getStatusCode(), anonDownloadConverted.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularNoHeader.getStatusCode()); assertEquals(OK.getStatusCode(), anonDownloadTabularWithFormatName.getStatusCode()); + assertEquals(UNAUTHORIZED.getStatusCode(), anonDownloadConvertedNullKey.getStatusCode()); + int origSizeAnon = anonDownloadOriginal.getBody().asByteArray().length; int convertSizeAnon = anonDownloadConverted.getBody().asByteArray().length; int tabularSizeNoVarHeader = anonDownloadTabularNoHeader.getBody().asByteArray().length; @@ -423,10 +427,7 @@ private HashMap readZipResponse(InputStream iStrea } String name = entry.getName(); -// String s = String.format("Entry: %s len %d added %TD", -// entry.getName(), entry.getSize(), -// new Date(entry.getTime())); -// System.out.println(s); + // Once we get the entry from the zStream, the zStream is // positioned read to read the raw data, and we keep @@ -466,7 +467,7 @@ private HashMap readZipResponse(InputStream iStrea @Test public void testRequestAccess() throws InterruptedException { - + String pathToJsonFile = "scripts/api/data/dataset-create-new.json"; Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); createDatasetResponse.prettyPrint(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 0c5de662e8a..6d7dd2eae29 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -15,21 +15,21 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + + import java.util.Map; import java.util.UUID; +import java.util.logging.Level; import java.util.logging.Logger; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; +import static jakarta.ws.rs.core.Response.Status.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; @@ -818,45 +818,103 @@ public void testLoadMetadataBlock_ErrorHandling() { message ); } + @Test + public void testClearThumbnailFailureFlag(){ + Response nonExistentFile = UtilIT.clearThumbnailFailureFlag(Long.MAX_VALUE); + nonExistentFile.prettyPrint(); + nonExistentFile.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + Response clearAllFlags = UtilIT.clearThumbnailFailureFlags(); + clearAllFlags.prettyPrint(); + clearAllFlags.then().assertThat().statusCode(OK.getStatusCode()); + } @Test public void testBannerMessages(){ - - String pathToJsonFile = "scripts/api/data/bannerMessageError.json"; - Response addBannerMessageErrorResponse = UtilIT.addBannerMessage(pathToJsonFile); + + //We check for existing banner messages and get the number of existing messages + Response getBannerMessageResponse = UtilIT.getBannerMessages(); + getBannerMessageResponse.prettyPrint(); + getBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + Integer numBannerMessages = + JsonPath.from(getBannerMessageResponse.getBody().asString()).getInt("data.size()"); + + //We add a banner message with an error in the json file + String pathToJsonFile = "scripts/api/data/bannerMessageError.json"; + Response addBannerMessageErrorResponse = UtilIT.addBannerMessage(pathToJsonFile); addBannerMessageErrorResponse.prettyPrint(); - String body = addBannerMessageErrorResponse.getBody().asString(); - String status = JsonPath.from(body).getString("status"); - assertEquals("ERROR", status); + addBannerMessageErrorResponse.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("status", equalTo("ERROR")); + //We add a banner message with a correct json file pathToJsonFile = "scripts/api/data/bannerMessageTest.json"; - Response addBannerMessageResponse = UtilIT.addBannerMessage(pathToJsonFile); addBannerMessageResponse.prettyPrint(); - body = addBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); + addBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("status", equalTo("OK")) + .body("data.message", equalTo("Banner Message added successfully.")); + Long addedBanner = Long.valueOf( + JsonPath.from(addBannerMessageResponse.getBody().asString()).getLong("data.id")); - Response getBannerMessageResponse = UtilIT.getBannerMessages(); + //We get the banner messages and check that the number of messages has increased by 1 + getBannerMessageResponse = UtilIT.getBannerMessages(); getBannerMessageResponse.prettyPrint(); - body = getBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); - String deleteId = UtilIT.getBannerMessageIdFromResponse(getBannerMessageResponse.getBody().asString()); - - System.out.print("delete id: " + deleteId); - - Response deleteBannerMessageResponse = UtilIT.deleteBannerMessage(new Long (deleteId)); + getBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(numBannerMessages + 1)); + + //We delete the banner message + Response deleteBannerMessageResponse = UtilIT.deleteBannerMessage(addedBanner); deleteBannerMessageResponse.prettyPrint(); - body = deleteBannerMessageResponse.getBody().asString(); - status = JsonPath.from(body).getString("status"); - assertEquals("OK", status); + deleteBannerMessageResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("status", equalTo("OK")); } + /** + * For a successful download from /tmp, see BagIT. Here we are doing error + * checking. + */ + @Test + public void testDownloadTmpFile() throws IOException { + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response tryToDownloadAsNonSuperuser = UtilIT.downloadTmpFile("/tmp/foo", apiToken); + tryToDownloadAsNonSuperuser.then().assertThat().statusCode(FORBIDDEN.getStatusCode()); + + Response toggleSuperuser = UtilIT.makeSuperUser(username); + toggleSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response tryToDownloadEtcPasswd = UtilIT.downloadTmpFile("/etc/passwd", apiToken); + tryToDownloadEtcPasswd.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()) + .body("status", equalTo("ERROR")) + .body("message", equalTo("Path must begin with '/tmp' but after normalization was '/etc/passwd'.")); + } + private String createTestNonSuperuserApiToken() { Response createUserResponse = UtilIT.createRandomUser(); createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); return UtilIT.getApiTokenFromResponse(createUserResponse); } + + @ParameterizedTest + @ValueSource(booleans={true,false}) + public void testSetSuperUserStatus(Boolean status) { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + Response toggleSuperuser = UtilIT.setSuperuserStatus(username, status); + toggleSuperuser.then().assertThat() + .statusCode(OK.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java index e7210bc45a9..c80e321b228 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/BagIT.java @@ -1,17 +1,32 @@ package edu.harvard.iq.dataverse.api; -import io.restassured.RestAssured; -import io.restassured.response.Response; import edu.harvard.iq.dataverse.engine.command.impl.LocalSubmitToArchiveCommand; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.response.Response; import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.Enumeration; +import java.util.Scanner; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class BagIT { + static String bagitExportDir = "/tmp"; + @BeforeAll public static void setUpClass() { @@ -25,21 +40,21 @@ public static void setUpClass() { setArchiverSettings.then().assertThat() .statusCode(OK.getStatusCode()); - Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", "/tmp"); + Response setBagItLocalPath = UtilIT.setSetting(":BagItLocalPath", bagitExportDir); setBagItLocalPath.then().assertThat() .statusCode(OK.getStatusCode()); } @Test - public void testBagItExport() { + public void testBagItExport() throws IOException { Response createUser = UtilIT.createRandomUser(); createUser.then().assertThat().statusCode(OK.getStatusCode()); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); - Response toggleSuperuser = UtilIT.makeSuperUser(username); - toggleSuperuser.then().assertThat() + Response makeSuperuser = UtilIT.setSuperuserStatus(username, true); + makeSuperuser.then().assertThat() .statusCode(OK.getStatusCode()); Response createDataverse = UtilIT.createRandomDataverse(apiToken); @@ -63,6 +78,78 @@ public void testBagItExport() { archiveDataset.prettyPrint(); archiveDataset.then().assertThat().statusCode(OK.getStatusCode()); + // spaceName comes from LocalSubmitToArchiveCommand + String spaceName = datasetPid.replace(':', '-').replace('/', '-') + .replace('.', '-').toLowerCase(); + // spacename: doi-10-5072-fk2-fosg5q + + String pathToZip = bagitExportDir + "/" + spaceName + "v1.0" + ".zip"; + + try { + // give the bag time to generate + Thread.sleep(3000); + } catch (InterruptedException ex) { + } + + // A bag could look like this: + //doi-10-5072-FK2-DKUTDUv-1-0/data/ + //doi-10-5072-FK2-DKUTDUv-1-0/data/Darwin's Finches/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/ + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/pid-mapping.txt + //doi-10-5072-FK2-DKUTDUv-1-0/manifest-md5.txt + //doi-10-5072-FK2-DKUTDUv-1-0/bagit.txt + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/oai-ore.jsonld + //doi-10-5072-FK2-DKUTDUv-1-0/metadata/datacite.xml + //doi-10-5072-FK2-DKUTDUv-1-0/bag-info.txt + // --- + // bag-info.txt could look like this: + //Contact-Name: Finch, Fiona + //Contact-Email: finch@mailinator.com + //Source-Organization: Dataverse Installation () + //Organization-Address: + //Organization-Email: + //External-Description: Darwin's finches (also known as the Galápagos finches) are a group of about + // fifteen species of passerine birds. + //Bagging-Date: 2023-11-14 + //External-Identifier: https://doi.org/10.5072/FK2/LZIGBC + //Bag-Size: 0 bytes + //Payload-Oxum: 0.0 + //Internal-Sender-Identifier: Root:Darwin's Finches + Response downloadBag = UtilIT.downloadTmpFile(pathToZip, apiToken); + downloadBag.then().assertThat().statusCode(OK.getStatusCode()); + Path outputPath = Paths.get("/tmp/foo.zip"); + java.nio.file.Files.copy(downloadBag.getBody().asInputStream(), outputPath, StandardCopyOption.REPLACE_EXISTING); + + ZipFile zipFile = new ZipFile(outputPath.toString()); + Enumeration entries = zipFile.entries(); + String sourceOrg = null; + String orgAddress = null; + String orgEmail = null; + while (entries.hasMoreElements()) { + ZipEntry entry = entries.nextElement(); + String name = entry.getName(); + System.out.println("name: " + name); + if (name.endsWith("bag-info.txt")) { + InputStream stream = zipFile.getInputStream(entry); + Scanner s = new Scanner(stream).useDelimiter("\\A"); + String result = s.hasNext() ? s.next() : ""; + System.out.println("result: " + result); + String[] lines = result.split("\n"); + for (String line : lines) { + if (line.startsWith("Source-Organization")) { + sourceOrg = line; + } else if (line.startsWith("Organization-Address")) { + orgAddress = line; + } else if (line.startsWith("Organization-Email")) { + orgEmail = line; + } else { + } + } + } + } + assertEquals("Source-Organization: Dataverse Installation ()", sourceOrg.trim()); + assertEquals("Organization-Address: ", orgAddress.trim()); + assertEquals("Organization-Email: ", orgEmail.trim()); } @AfterAll @@ -75,4 +162,4 @@ public static void tearDownClass() { } -} +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java index facb3f7c784..3cd03abeb38 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataRetrieverApiIT.java @@ -3,10 +3,14 @@ import io.restassured.RestAssured; import io.restassured.response.Response; import edu.harvard.iq.dataverse.api.auth.ApiKeyAuthMechanism; +import edu.harvard.iq.dataverse.util.BundleUtil; + import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; @@ -15,6 +19,8 @@ public class DataRetrieverApiIT { + private static final String ERR_MSG_FORMAT = "{\n \"success\": false,\n \"error_message\": \"%s\"\n}"; + @BeforeAll public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); @@ -35,14 +41,24 @@ public void testRetrieveMyDataAsJsonString() { String badUserIdentifier = "bad-identifier"; Response invalidUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, badUserIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"No user found for: \\\"" + badUserIdentifier + "\\\"\"}", invalidUserIdentifierResponse.prettyPrint()); + assertEquals(prettyPrintError("dataretrieverAPI.user.not.found", Arrays.asList(badUserIdentifier)), invalidUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), invalidUserIdentifierResponse.getStatusCode()); // Call as superuser with valid user identifier Response createSecondUserResponse = UtilIT.createRandomUser(); String userIdentifier = UtilIT.getUsernameFromResponse(createSecondUserResponse); Response validUserIdentifierResponse = UtilIT.retrieveMyDataAsJsonString(superUserApiToken, userIdentifier, emptyRoleIdsList); - assertEquals("{\"success\":false,\"error_message\":\"Sorry, you have no assigned roles.\"}", validUserIdentifierResponse.prettyPrint()); + assertEquals(prettyPrintError("myDataFinder.error.result.no.role", null), validUserIdentifierResponse.prettyPrint()); assertEquals(OK.getStatusCode(), validUserIdentifierResponse.getStatusCode()); } + + private static String prettyPrintError(String resourceBundleKey, List params) { + final String errorMessage; + if (params == null || params.isEmpty()) { + errorMessage = BundleUtil.getStringFromBundle(resourceBundleKey); + } else { + errorMessage = BundleUtil.getStringFromBundle(resourceBundleKey, params); + } + return String.format(ERR_MSG_FORMAT, errorMessage.replaceAll("\"", "\\\\\"")); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 56bf53c1c99..cb9481d3491 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -1,108 +1,73 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersionFilesServiceBean; import edu.harvard.iq.dataverse.FileSearchCriteria; -import io.restassured.RestAssured; - -import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; -import static edu.harvard.iq.dataverse.api.ApiConstants.*; -import static io.restassured.RestAssured.given; - -import io.restassured.path.json.JsonPath; -import io.restassured.http.ContentType; -import io.restassured.response.Response; - -import java.time.LocalDate; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.logging.Logger; - -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; -import org.skyscreamer.jsonassert.JSONAssert; -import org.junit.jupiter.api.Disabled; - -import jakarta.json.JsonObject; - -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; -import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import static jakarta.ws.rs.core.Response.Status.METHOD_NOT_ALLOWED; -import static jakarta.ws.rs.core.Response.Status.CONFLICT; -import static jakarta.ws.rs.core.Response.Status.NO_CONTENT; - -import edu.harvard.iq.dataverse.DataFile; - -import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; - import edu.harvard.iq.dataverse.authorization.DataverseRole; -import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; -import edu.harvard.iq.dataverse.settings.SettingsServiceBean; - -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; - -import io.restassured.parsing.Parser; - -import static io.restassured.path.json.JsonPath.with; - -import io.restassured.path.xml.XmlPath; - -import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; - import edu.harvard.iq.dataverse.authorization.groups.impl.builtin.AuthenticatedUsers; +import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.GlobusOverlayAccessIOTest; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.datavariable.VariableMetadata; import edu.harvard.iq.dataverse.datavariable.VariableMetadataDDIParser; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.StringReader; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.Files; - +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import io.restassured.RestAssured; +import io.restassured.http.ContentType; +import io.restassured.parsing.Parser; +import io.restassured.path.json.JsonPath; +import io.restassured.path.xml.XmlPath; +import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; +import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.core.Response.Status; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.exception.ExceptionUtils; +import org.hamcrest.CoreMatchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.skyscreamer.jsonassert.JSONAssert; + import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.logging.Logger; +import static edu.harvard.iq.dataverse.DatasetVersion.ARCHIVE_NOTE_MAX_LENGTH; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import static edu.harvard.iq.dataverse.api.UtilIT.API_TOKEN_HTTP_HEADER; +import static edu.harvard.iq.dataverse.api.UtilIT.equalToCI; +import static io.restassured.RestAssured.given; +import static io.restassured.path.json.JsonPath.with; +import static jakarta.ws.rs.core.Response.Status.*; import static java.lang.Thread.sleep; -import static org.junit.jupiter.api.Assertions.assertEquals; - -import org.hamcrest.CoreMatchers; - -import static org.hamcrest.CoreMatchers.containsString; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.hasItems; -import static org.hamcrest.CoreMatchers.startsWith; -import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.CoreMatchers.*; import static org.hamcrest.Matchers.contains; - -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; - +import static org.junit.jupiter.api.Assertions.*; public class DatasetsIT { private static final Logger logger = Logger.getLogger(DatasetsIT.class.getCanonicalName()); - - @BeforeAll public static void setUpClass() { @@ -135,6 +100,7 @@ public static void setUpClass() { */ } + @AfterAll public static void afterClass() { @@ -162,6 +128,59 @@ public static void afterClass() { .statusCode(200); */ } + + @Test + public void testCollectionSchema(){ + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); + + String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json"); + + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, expectedJson, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + String pathToJsonFile = "scripts/search/tests/data/datasetMissingReqFields.json"; + + String jsonIn = UtilIT.getDatasetJson(pathToJsonFile); + + Response validateBadDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, jsonIn, apiToken); + validateBadDatasetJsonResponse.prettyPrint(); + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(containsString("failed validation")); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + } @Test public void testCreateDataset() { @@ -583,6 +602,7 @@ public void testCreatePublishDestroyDataset() { */ @Test public void testDatasetVersionsAPI() { + // Create user String apiToken = UtilIT.createRandomUserGetToken(); @@ -609,26 +629,68 @@ public void testDatasetVersionsAPI() { Response unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, ":draft", apiToken); unpublishedDraft.prettyPrint(); unpublishedDraft.then().assertThat() - .body("data.files.size()", equalTo(1)) .statusCode(OK.getStatusCode()); // Now check that the file is NOT shown, when we ask the versions api to // skip files: - boolean skipFiles = true; - unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, skipFiles, false); + boolean excludeFiles = true; + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false); unpublishedDraft.prettyPrint(); unpublishedDraft.then().assertThat() - .body("data.files", equalTo(null)) - .statusCode(OK.getStatusCode()); + .statusCode(OK.getStatusCode()) + .body("data.files", equalTo(null)); + + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + excludeFiles = false; + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiToken, excludeFiles, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.files.size()", equalTo(1)); + + unpublishedDraft = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_DRAFT, apiTokenNoPerms, excludeFiles, false); + unpublishedDraft.prettyPrint(); + unpublishedDraft.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + // Publish collection and dataset UtilIT.publishDataverseViaNativeApi(collectionAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + //Set of tests on non-deaccesioned dataset + String specificVersion = "1.0"; + boolean includeDeaccessioned = false; + Response datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.latestVersionPublishingState", equalTo("RELEASED")); + // Upload another file: String pathToFile2 = "src/main/webapp/resources/images/cc0.png"; Response uploadResponse2 = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile2, apiToken); + uploadResponse2.prettyPrint(); uploadResponse2.then().assertThat().statusCode(OK.getStatusCode()); + + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.latestVersionPublishingState", equalTo("DRAFT")); + + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.latestVersionPublishingState", equalTo("DRAFT")); // We should now have a published version, and a draft. @@ -642,7 +704,8 @@ public void testDatasetVersionsAPI() { .body("data.size()", equalTo(2)) .body("data[0].files.size()", equalTo(2)) .body("data[1].files.size()", equalTo(1)); - + + // Now call this api with the new (as of 6.1) pagination parameters Integer offset = 0; Integer howmany = 1; @@ -652,23 +715,296 @@ public void testDatasetVersionsAPI() { versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.size()", equalTo(1)) + .body("data.versionState[0]", equalTo("DRAFT")) .body("data[0].files.size()", equalTo(2)); // And now call it with an un-privileged token, to make sure only one - // (the published) version is shown: - + // (the published) version is shown: versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms); versionsResponse.prettyPrint(); versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) + .body("data.versionState[0]", not("DRAFT")) .body("data.size()", equalTo(1)); // And now call the "short", no-files version of the same api - versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, skipFiles); + excludeFiles = true; + versionsResponse = UtilIT.getDatasetVersions(datasetPid, apiTokenNoPerms, excludeFiles); versionsResponse.prettyPrint(); versionsResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].files", equalTo(null)); + + + + + + + excludeFiles = true; + //Latest published authorized token + //Latest published requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Latest published unauthorized token + //Latest published requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Latest authorized token + //Latest requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token + //Latest requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Specific version authorized token + //Specific version requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + //Specific version unauthorized token + //Specific version requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files", equalTo(null)); + + excludeFiles = false; + + //Latest published authorized token + //Latest published requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Latest published unauthorized token + //Latest published requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Latest authorized token, user is authenticated should get the Draft version + //Latest requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token, user has no permissions should get the latest Published version + //Latest requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Specific version authorized token + //Specific version requested, draft exists and user has access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //Specific version unauthorized token + //Specific version requested, draft exists but user doesn't have access to draft + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("RELEASED")) + .body("data.files.size()", equalTo(1)); + + //We deaccession the dataset + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //Set of tests on deaccesioned dataset, only 3/9 should return OK message + + includeDeaccessioned = true; + excludeFiles = false; + + //Latest published authorized token with deaccessioned dataset + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files.size()", equalTo(1)); + + //Latest published requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files.size()", equalTo(1)); + + //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + excludeFiles = true; + + //Latest published exclude files authorized token with deaccessioned dataset + //Latest published requested, that version was deaccessioned but a draft exist and the user has access to it. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Latest published exclude files, should get the DEACCESSIONED version + //Latest published requested, that version was deaccessioned but a draft exist but the user doesn't have access to it. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Latest authorized token should get the DRAFT version with no files + //Latest requested there is a draft and the user has access to it. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token excluding files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + //Latest requested and latest version is deaccessioned and the user doesn't have access to the draft. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Specific version authorized token + //Specific version requested (deaccesioned), the latest version is on draft amd the user has access to it. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Specific version unauthorized token requesting files, one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets. + //Specific version requested (deaccesioned), the latest version is on draft but the user doesn't have access to it. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DEACCESSIONED")) + .body("data.files", equalTo(null)); + + //Set of test when we have a deaccessioned dataset but we don't include deaccessioned + includeDeaccessioned = false; + excludeFiles = false; + + //Latest published authorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest published unauthorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + //Latest version requested, the user has access to the draft. + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files.size()", equalTo(2)); + + //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + excludeFiles = true; + + //Latest published authorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest published unauthorized token with deaccessioned dataset not included + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST_PUBLISHED, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Latest authorized token should get the DRAFT version + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(OK.getStatusCode()) + .body("data.versionState", equalTo("DRAFT")) + .body("data.files", equalTo(null)); + + //Latest unauthorized token one version is DEACCESSIONED the second is DRAFT so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, DS_VERSION_LATEST, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version authorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiToken, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + //Specific version unauthorized token, the version is DEACCESSIONED so shouldn't get any datasets + datasetVersion = UtilIT.getDatasetVersion(datasetPid, specificVersion, apiTokenNoPerms, excludeFiles, includeDeaccessioned); + datasetVersion.prettyPrint(); + datasetVersion.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + } @@ -923,6 +1259,10 @@ public void testExcludeEmail() { } + @Disabled + /*The identifier generation style is no longer a global, dynamically changeable setting. To make this test work after PR #10234, + * will require configuring a PidProvider that uses this style and creating a collection/dataset that uses that provider. + */ @Test public void testStoredProcGeneratedAsIdentifierGenerationStyle() { // Please note that this test only works if the stored procedure @@ -1376,6 +1716,29 @@ public void testAddRoles(){ giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "fileDownloader", "@" + randomUsername, apiToken); giveRandoPermission.prettyPrint(); assertEquals(200, giveRandoPermission.getStatusCode()); + + //Asserting same role creation is covered + validateAssignExistingRole(datasetPersistentId,randomUsername,apiToken, "fileDownloader"); + + // Create another random user to become curator: + + Response createCuratorUser = UtilIT.createRandomUser(); + createCuratorUser.prettyPrint(); + String curatorUsername = UtilIT.getUsernameFromResponse(createCuratorUser); + String curatorUserApiToken = UtilIT.getApiTokenFromResponse(createCuratorUser); + + Response giveCuratorPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "curator", "@" + curatorUsername, apiToken); + giveCuratorPermission.prettyPrint(); + assertEquals(200, giveCuratorPermission.getStatusCode()); + + // Test if privilege escalation is possible: curator should not be able to assign admin rights + Response giveTooMuchPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "admin", "@" + curatorUsername, curatorUserApiToken); + giveTooMuchPermission.prettyPrint(); + assertEquals(401, giveTooMuchPermission.getStatusCode()); + + giveTooMuchPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "admin", "@" + randomUsername, curatorUserApiToken); + giveTooMuchPermission.prettyPrint(); + assertEquals(401, giveTooMuchPermission.getStatusCode()); String idToDelete = JsonPath.from(giveRandoPermission.getBody().asString()).getString("data.id"); @@ -1398,7 +1761,7 @@ public void testAddRoles(){ deleteGrantedAccess.prettyPrint(); assertEquals(200, deleteGrantedAccess.getStatusCode()); - Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); + Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); deleteDatasetResponse.prettyPrint(); assertEquals(200, deleteDatasetResponse.getStatusCode()); @@ -1409,9 +1772,98 @@ public void testAddRoles(){ Response deleteUserResponse = UtilIT.deleteUser(username); deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); + + deleteUserResponse = UtilIT.deleteUser(randomUsername); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); + + deleteUserResponse = UtilIT.deleteUser(curatorUsername); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); } + @Test + public void testListRoleAssignments() { + Response createAdminUser = UtilIT.createRandomUser(); + String adminUsername = UtilIT.getUsernameFromResponse(createAdminUser); + String adminApiToken = UtilIT.getApiTokenFromResponse(createAdminUser); + UtilIT.makeSuperUser(adminUsername); + + Response createDataverseResponse = UtilIT.createRandomDataverse(adminApiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Now, let's allow anyone with a Dataverse account (any "random user") + // to create datasets in this dataverse: + + Response grantRole = UtilIT.grantRoleOnDataverse(dataverseAlias, DataverseRole.DS_CONTRIBUTOR, AuthenticatedUsers.get().getIdentifier(), adminApiToken); + grantRole.prettyPrint(); + assertEquals(OK.getStatusCode(), grantRole.getStatusCode()); + + Response createContributorUser = UtilIT.createRandomUser(); + String contributorUsername = UtilIT.getUsernameFromResponse(createContributorUser); + String contributorApiToken = UtilIT.getApiTokenFromResponse(createContributorUser); + + // First, we test listing role assignments on a dataverse which requires "ManageDataversePermissions" + + Response notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + + Response roleAssignmentsOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, adminApiToken); + roleAssignmentsOnDataverse.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataverse.getStatusCode()); + + // Second, we test listing role assignments on a dataset which requires "ManageDatasetPermissions" + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, contributorApiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + logger.info("dataset id: " + datasetId); + + Response datasetAsJson = UtilIT.nativeGet(datasetId, adminApiToken); + datasetAsJson.then().assertThat() + .statusCode(OK.getStatusCode()); + + String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + assertEquals(10, identifier.length()); + + String protocol1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.protocol"); + String authority1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.authority"); + String identifier1 = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + String datasetPersistentId = protocol1 + ":" + authority1 + "/" + identifier1; + + Response notPermittedToListRoleAssignmentOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataset.getStatusCode()); + + // We assign the curator role to the contributor user + // (includes "ManageDatasetPermissions" which are required for listing role assignments of a dataset, but not + // "ManageDataversePermissions") + + Response giveRandoPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, "curator", "@" + contributorUsername, adminApiToken); + giveRandoPermission.prettyPrint(); + assertEquals(200, giveRandoPermission.getStatusCode()); + + // Contributor user should now be able to list dataset role assignments as well + + Response roleAssignmentsOnDataset = UtilIT.getRoleAssignmentsOnDataset(datasetId.toString(), null, contributorApiToken); + roleAssignmentsOnDataset.prettyPrint(); + assertEquals(OK.getStatusCode(), roleAssignmentsOnDataset.getStatusCode()); + + // ...but not dataverse role assignments + + notPermittedToListRoleAssignmentOnDataverse = UtilIT.getRoleAssignmentsOnDataverse(dataverseAlias, contributorApiToken); + assertEquals(UNAUTHORIZED.getStatusCode(), notPermittedToListRoleAssignmentOnDataverse.getStatusCode()); + } + + private static void validateAssignExistingRole(String datasetPersistentId, String randomUsername, String apiToken, String role) { + final Response failedGrantPermission = UtilIT.grantRoleOnDataset(datasetPersistentId, role, "@" + randomUsername, apiToken); + failedGrantPermission.prettyPrint(); + failedGrantPermission.then().assertThat() + .body("message", containsString("User already has this role for this dataset")) + .statusCode(FORBIDDEN.getStatusCode()); + } + @Test public void testFileChecksum() { @@ -1520,6 +1972,43 @@ public void testDeleteDatasetWhileFileIngesting() { .statusCode(FORBIDDEN.getStatusCode()); } + + @Test + public void testGetDatasetOwners() { + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + createDatasetResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String persistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + logger.info("Dataset created with id " + datasetId + " and persistent id " + persistentId); + + Response getDatasetWithOwners = UtilIT.getDatasetWithOwners(persistentId, apiToken, true); + getDatasetWithOwners.prettyPrint(); + getDatasetWithOwners.then().assertThat().body("data.isPartOf.identifier", equalTo(dataverseAlias)); + + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); + assertEquals(200, destroyDatasetResponse.getStatusCode()); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(username); + assertEquals(200, deleteUserResponse.getStatusCode()); + } /** * In order for this test to pass you must have the Data Capture Module ( @@ -2646,6 +3135,46 @@ public void testSemanticMetadataAPIs() { response = UtilIT.updateDatasetJsonLDMetadata(datasetId, apiToken, badTerms, false); response.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + //We publish the dataset and dataverse + UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken).then().assertThat().statusCode(OK.getStatusCode()); + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); + + //We check the version is published + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + String publishedVersion = jsonLDObject.getString("http://schema.org/version"); + assertNotEquals("DRAFT", publishedVersion); + + // Upload a file so a draft version is created + String pathToFile = "src/main/webapp/resources/images/cc0.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + uploadResponse.prettyPrint(); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + int fileID = uploadResponse.jsonPath().getInt("data.files[0].dataFile.id"); + + //We check the authenticated user gets DRAFT + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiToken); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + assertEquals("DRAFT", jsonLDObject.getString("http://schema.org/version")); + + // Create user with no permission and check they get published version + String apiTokenNoPerms = UtilIT.createRandomUserGetToken(); + response = UtilIT.getDatasetJsonLDMetadata(datasetId, apiTokenNoPerms); + response.prettyPrint(); + jsonLDString = getData(response.getBody().asString()); + jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); + assertNotEquals("DRAFT", jsonLDObject.getString("http://schema.org/version")); + + // Delete the file + Response deleteFileResponse = UtilIT.deleteFileInDataset(fileID, apiToken); + deleteFileResponse.prettyPrint(); + deleteFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + // Delete the terms of use response = UtilIT.deleteDatasetJsonLDMetadata(datasetId, apiToken, "{\"https://dataverse.org/schema/core#termsOfUse\": \"New terms\"}"); @@ -2659,15 +3188,27 @@ public void testSemanticMetadataAPIs() { jsonLDObject = JSONLDUtil.decontextualizeJsonLD(jsonLDString); assertTrue(!jsonLDObject.containsKey("https://dataverse.org/schema/core#termsOfUse")); - // Cleanup - delete dataset, dataverse, user... - Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); - deleteDatasetResponse.prettyPrint(); - assertEquals(200, deleteDatasetResponse.getStatusCode()); + //Delete the DRAFT dataset + Response deleteDraftResponse = UtilIT.deleteDatasetVersionViaNativeApi(datasetId, DS_VERSION_DRAFT, apiToken); + deleteDraftResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //We set the user as superuser so we can delete the published dataset + Response superUserResponse = UtilIT.makeSuperUser(username); + superUserResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + + //Delete the published dataset + Response deletePublishedResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); + deletePublishedResponse.prettyPrint(); + deleteDraftResponse.then().assertThat().statusCode(OK.getStatusCode()); + //Delete the dataverse Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); assertEquals(200, deleteDataverseResponse.getStatusCode()); + //Delete the user Response deleteUserResponse = UtilIT.deleteUser(username); deleteUserResponse.prettyPrint(); assertEquals(200, deleteUserResponse.getStatusCode()); @@ -3233,7 +3774,8 @@ public void testArchivalStatusAPI() throws IOException { //Verify the status is empty Response nullStatus = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus.prettyPrint(); + nullStatus.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); //Set it Response setStatus = UtilIT.setDatasetVersionArchivalStatus(datasetId, "1.0", apiToken, "pending", @@ -3251,7 +3793,7 @@ public void testArchivalStatusAPI() throws IOException { //Make sure it's gone Response nullStatus2 = UtilIT.getDatasetVersionArchivalStatus(datasetId, "1.0", apiToken); - nullStatus2.then().assertThat().statusCode(NO_CONTENT.getStatusCode()); + nullStatus2.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); } @@ -3368,18 +3910,52 @@ public void getDatasetVersionCitation() { String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); - Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, apiToken); + Response getDatasetVersionCitationResponse = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_DRAFT, false, apiToken); getDatasetVersionCitationResponse.prettyPrint(); getDatasetVersionCitationResponse.then().assertThat() .statusCode(OK.getStatusCode()) // We check that the returned message contains information expected for the citation string .body("data.message", containsString("DRAFT VERSION")); + + // Test Deaccessioned + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.prettyPrint(); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()) + .assertThat().body("data.message", containsString(String.valueOf(datasetId))); + + // includeDeaccessioned false + Response getDatasetVersionCitationNotDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, false, apiToken); + getDatasetVersionCitationNotDeaccessioned.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // includeDeaccessioned true + Response getDatasetVersionCitationDeaccessioned = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken); + getDatasetVersionCitationDeaccessioned.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", containsString("DEACCESSIONED VERSION")); + + publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.prettyPrint(); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String persistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + deaccessionDatasetResponse = UtilIT.deaccessionDataset(persistentId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionDatasetResponse.prettyPrint(); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()) + .assertThat().body("data.message", containsString(String.valueOf(persistentId))); } + @Test public void getVersionFiles() throws IOException, InterruptedException { Response createUser = UtilIT.createRandomUser(); @@ -3418,7 +3994,8 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName1)) - .body("data[1].label", equalTo(testFileName2)); + .body("data[1].label", equalTo(testFileName2)) + .body("totalCount", equalTo(5)); int fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3432,7 +4009,8 @@ public void getVersionFiles() throws IOException, InterruptedException { getVersionFilesResponsePaginated.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].label", equalTo(testFileName3)) - .body("data[1].label", equalTo(testFileName4)); + .body("data[1].label", equalTo(testFileName4)) + .body("totalCount", equalTo(5)); fileMetadatasCount = getVersionFilesResponsePaginated.jsonPath().getList("data").size(); assertEquals(testPageSize, fileMetadatasCount); @@ -3584,6 +4162,21 @@ public void getVersionFiles() throws IOException, InterruptedException { fileMetadatasCount = getVersionFilesResponseEmbargoedThenRestricted.jsonPath().getList("data").size(); assertEquals(1, fileMetadatasCount); + + // Test Access Status Retention + UtilIT.setSetting(SettingsServiceBean.Key.MinRetentionDurationInMonths, "-1"); + String retentionEndDate = LocalDate.now().plusMonths(240).format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); + + // Create retention for test file 2 (Retention and Public) + Response createFileRetentionResponse = UtilIT.createFileRetention(datasetId, Integer.parseInt(testFileId2), retentionEndDate, apiToken); + createFileRetentionResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response getVersionFilesResponseRetentionPeriodExpired = UtilIT.getVersionFiles(datasetId, DS_VERSION_LATEST, null, null, null, FileSearchCriteria.FileAccessStatus.RetentionPeriodExpired.toString(), null, null, null, null, false, apiToken); + getVersionFilesResponseRetentionPeriodExpired.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("totalCount", equalTo(0)); + // Test Access Status Public Response getVersionFilesResponsePublic = UtilIT.getVersionFiles(datasetId, DS_VERSION_LATEST, null, null, null, FileSearchCriteria.FileAccessStatus.Public.toString(), null, null, null, null, false, apiToken); @@ -4102,4 +4695,166 @@ public void testGetUserPermissionsOnDataset() { Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getUserPermissionsOnDataset("testInvalidId", apiToken); getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); } + + //Requires that a Globus remote store be set up as with the parameters in the GlobusOverlayAccessIOTest class + //Tests whether the API call succeeds and has some of the expected parameters + @Test + @Disabled + public void testGetGlobusUploadParameters() { + //Creates managed and remote Globus stores + GlobusOverlayAccessIOTest.setUp(); + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); + + Response setDriver = UtilIT.setDatasetStorageDriver(datasetId, System.getProperty("dataverse.files.globusr.label"), apiToken); + assertEquals(200, setDriver.getStatusCode()); + + Response getUploadParams = UtilIT.getDatasetGlobusUploadParameters(datasetId, "en_us", apiToken); + assertEquals(200, getUploadParams.getStatusCode()); + JsonObject data = JsonUtil.getJsonObject(getUploadParams.getBody().asString()); + JsonObject queryParams = data.getJsonObject("queryParameters"); + assertEquals("en_us", queryParams.getString("dvLocale")); + assertEquals("false", queryParams.getString("managed")); + //Assumes only one reference endpoint with a basepath is configured + assertTrue(queryParams.getJsonArray("referenceEndpointsWithPaths").get(0).toString().indexOf(System.getProperty("dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS)) > -1); + JsonArray signedUrls = data.getJsonArray("signedUrls"); + boolean found = false; + for (int i = 0; i < signedUrls.size(); i++) { + JsonObject signedUrl = signedUrls.getJsonObject(i); + if (signedUrl.getString("name").equals("requestGlobusReferencePaths")) { + found=true; + break; + } + } + assertTrue(found); + //Removes managed and remote Globus stores + GlobusOverlayAccessIOTest.tearDown(); + } + + @Test + public void testGetCanDownloadAtLeastOneFile() { + Response createUserResponse = UtilIT.createRandomUser(); + createUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + int datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPersistentId = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + // Upload file + String pathToTestFile = "src/test/resources/images/coffeeshop.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToTestFile, Json.createObjectBuilder().build(), apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String fileId = JsonPath.from(uploadResponse.body().asString()).getString("data.files[0].dataFile.id"); + + // Publish dataset version + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Create a second user to call the getCanDownloadAtLeastOneFile method + Response createSecondUserResponse = UtilIT.createRandomUser(); + createSecondUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String secondUserApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + String secondUserUsername = UtilIT.getUsernameFromResponse(createSecondUserResponse); + + // Call when a file is released + Response canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + boolean canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Restrict file + Response restrictFileResponse = UtilIT.restrictFile(fileId, true, apiToken); + restrictFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Publish dataset version + publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetPersistentId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call when a file is restricted and the user does not have access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertFalse(canDownloadAtLeastOneFile); + + // Grant restricted file access to the user + Response grantFileAccessResponse = UtilIT.grantFileAccess(fileId, "@" + secondUserUsername, apiToken); + grantFileAccessResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call when a file is restricted and the user has access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, secondUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Create a third user to call the getCanDownloadAtLeastOneFile method + Response createThirdUserResponse = UtilIT.createRandomUser(); + createThirdUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String thirdUserApiToken = UtilIT.getApiTokenFromResponse(createThirdUserResponse); + String thirdUserUsername = UtilIT.getUsernameFromResponse(createThirdUserResponse); + + // Call when a file is restricted and the user does not have access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, thirdUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertFalse(canDownloadAtLeastOneFile); + + // Grant fileDownloader role on the dataset to the user + Response grantDatasetFileDownloaderRoleOnDatasetResponse = UtilIT.grantRoleOnDataset(datasetPersistentId, "fileDownloader", "@" + thirdUserUsername, apiToken); + grantDatasetFileDownloaderRoleOnDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call when a file is restricted and the user has fileDownloader role on the dataset + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, thirdUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Create a fourth user to call the getCanDownloadAtLeastOneFile method + Response createFourthUserResponse = UtilIT.createRandomUser(); + createFourthUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + String fourthUserApiToken = UtilIT.getApiTokenFromResponse(createFourthUserResponse); + String fourthUserUsername = UtilIT.getUsernameFromResponse(createFourthUserResponse); + + // Call when a file is restricted and the user does not have access + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, fourthUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertFalse(canDownloadAtLeastOneFile); + + // Grant fileDownloader role on the collection to the user + Response grantDatasetFileDownloaderRoleOnCollectionResponse = UtilIT.grantRoleOnDataverse(dataverseAlias, "fileDownloader", "@" + fourthUserUsername, apiToken); + grantDatasetFileDownloaderRoleOnCollectionResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Call when a file is restricted and the user has fileDownloader role on the collection + canDownloadAtLeastOneFileResponse = UtilIT.getCanDownloadAtLeastOneFile(Integer.toString(datasetId), DS_VERSION_LATEST, fourthUserApiToken); + canDownloadAtLeastOneFileResponse.then().assertThat().statusCode(OK.getStatusCode()); + canDownloadAtLeastOneFile = JsonPath.from(canDownloadAtLeastOneFileResponse.body().asString()).getBoolean("data"); + assertTrue(canDownloadAtLeastOneFile); + + // Call with invalid dataset id + Response getUserPermissionsOnDatasetInvalidIdResponse = UtilIT.getCanDownloadAtLeastOneFile("testInvalidId", DS_VERSION_LATEST, secondUserApiToken); + getUserPermissionsOnDatasetInvalidIdResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 78ece6ecc42..79cc46cfa79 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -18,21 +18,18 @@ import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.core.Response.Status; -import static jakarta.ws.rs.core.Response.Status.OK; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; -import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; -import static jakarta.ws.rs.core.Response.Status.INTERNAL_SERVER_ERROR; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; + +import static jakarta.ws.rs.core.Response.Status.*; +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasItemInArray; +import static org.junit.jupiter.api.Assertions.*; + import java.nio.file.Files; import io.restassured.path.json.JsonPath; import org.hamcrest.CoreMatchers; @@ -145,6 +142,34 @@ public void testMinimalDataverse() throws FileNotFoundException { deleteDataverse.prettyPrint(); deleteDataverse.then().assertThat().statusCode(OK.getStatusCode()); } + + + @Test + public void testGetDataverseOwners() throws FileNotFoundException { + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + Response createDataverse1Response = UtilIT.createRandomDataverse(apiToken); + + createDataverse1Response.prettyPrint(); + createDataverse1Response.then().assertThat().statusCode(CREATED.getStatusCode()); + + String first = UtilIT.getAliasFromResponse(createDataverse1Response); + + Response getWithOwnersFirst = UtilIT.getDataverseWithOwners(first, apiToken, true); + getWithOwnersFirst.prettyPrint(); + + Response createLevel1a = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-level1a", null, apiToken, first); + createLevel1a.prettyPrint(); + String level1a = UtilIT.getAliasFromResponse(createLevel1a); + + Response getWithOwners = UtilIT.getDataverseWithOwners(level1a, apiToken, true); + getWithOwners.prettyPrint(); + + getWithOwners.then().assertThat().body("data.isPartOf.identifier", equalTo(first)); + + } /** * A regular user can create a Dataverse Collection and access its @@ -388,12 +413,13 @@ public void testMoveDataverse() { Response superuserResponse = UtilIT.makeSuperUser(username); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); - createDataverseResponse.prettyPrint(); + assertTrue(createDataverseResponse.prettyPrint().contains("isReleased\": false")); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverseResponse); Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);//.publishDataverseViaSword(dataverseAlias, apiToken); assertEquals(200, publishDataverse.getStatusCode()); + assertTrue(publishDataverse.prettyPrint().contains("isReleased\": true")); Response createDataverseResponse2 = UtilIT.createRandomDataverse(apiToken); createDataverseResponse2.prettyPrint(); @@ -412,7 +438,7 @@ public void testMoveDataverse() { while (checkIndex) { try { try { - Thread.sleep(4000); + Thread.sleep(6000); } catch (InterruptedException ex) { } Response search = UtilIT.search("id:dataverse_" + dataverseId + "&subtree=" + dataverseAlias2, apiToken); @@ -663,5 +689,263 @@ public void testAttributesApi() throws Exception { deleteCollectionResponse.prettyPrint(); assertEquals(OK.getStatusCode(), deleteCollectionResponse.getStatusCode()); } - + + @Test + public void testListMetadataBlocks() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response setMetadataBlocksResponse = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); + setMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + + String[] testInputLevelNames = {"geographicCoverage", "country", "city"}; + boolean[] testRequiredInputLevels = {false, true, false}; + boolean[] testIncludedInputLevels = {false, true, true}; + Response updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, testRequiredInputLevels, testIncludedInputLevels, apiToken); + updateDataverseInputLevelsResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Dataverse not found + Response listMetadataBlocksResponse = UtilIT.listMetadataBlocks("-1", false, false, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // Existent dataverse and no optional params + String[] expectedAllMetadataBlockDisplayNames = {"Astronomy and Astrophysics Metadata", "Citation Metadata", "Geospatial Metadata"}; + + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, false, false, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", equalTo(null)) + .body("data[1].fields", equalTo(null)) + .body("data[2].fields", equalTo(null)) + .body("data.size()", equalTo(3)); + + String actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + String actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + String actualMetadataBlockDisplayName3 = listMetadataBlocksResponse.then().extract().path("data[2].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName3); + assertNotEquals(actualMetadataBlockDisplayName2, actualMetadataBlockDisplayName3); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName3)); + + // Existent dataverse and onlyDisplayedOnCreate=true + String[] expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames = {"Citation Metadata", "Geospatial Metadata"}; + + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, true, false, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", equalTo(null)) + .body("data[1].fields", equalTo(null)) + .body("data.size()", equalTo(2)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + + // Existent dataverse and returnDatasetFieldTypes=true + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, false, true, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", not(equalTo(null))) + .body("data[1].fields", not(equalTo(null))) + .body("data[2].fields", not(equalTo(null))) + .body("data.size()", equalTo(3)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + actualMetadataBlockDisplayName3 = listMetadataBlocksResponse.then().extract().path("data[2].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName3); + assertNotEquals(actualMetadataBlockDisplayName2, actualMetadataBlockDisplayName3); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + assertThat(expectedAllMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName3)); + + // Check dataset fields for the updated input levels are retrieved + int geospatialMetadataBlockIndex = actualMetadataBlockDisplayName1.equals("Geospatial Metadata") ? 0 : actualMetadataBlockDisplayName2.equals("Geospatial Metadata") ? 1 : 2; + + // Since the included property of geographicCoverage is set to false, we should retrieve the total number of fields minus one + listMetadataBlocksResponse.then().assertThat() + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(10)); + + String actualMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.geographicCoverage.name", geospatialMetadataBlockIndex)); + String actualMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.country.name", geospatialMetadataBlockIndex)); + String actualMetadataField3 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.city.name", geospatialMetadataBlockIndex)); + + assertNull(actualMetadataField1); + assertNotNull(actualMetadataField2); + assertNotNull(actualMetadataField3); + + // Existent dataverse and onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(dataverseAlias, true, true, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", not(equalTo(null))) + .body("data[1].fields", not(equalTo(null))) + .body("data.size()", equalTo(2)); + + actualMetadataBlockDisplayName1 = listMetadataBlocksResponse.then().extract().path("data[0].displayName"); + actualMetadataBlockDisplayName2 = listMetadataBlocksResponse.then().extract().path("data[1].displayName"); + assertNotEquals(actualMetadataBlockDisplayName1, actualMetadataBlockDisplayName2); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName1)); + assertThat(expectedOnlyDisplayedOnCreateMetadataBlockDisplayNames, hasItemInArray(actualMetadataBlockDisplayName2)); + + // Check dataset fields for the updated input levels are retrieved + geospatialMetadataBlockIndex = actualMetadataBlockDisplayName2.equals("Geospatial Metadata") ? 1 : 0; + + listMetadataBlocksResponse.then().assertThat() + .body(String.format("data[%d].fields.size()", geospatialMetadataBlockIndex), equalTo(1)); + + actualMetadataField1 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.geographicCoverage.name", geospatialMetadataBlockIndex)); + actualMetadataField2 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.country.name", geospatialMetadataBlockIndex)); + actualMetadataField3 = listMetadataBlocksResponse.then().extract().path(String.format("data[%d].fields.city.name", geospatialMetadataBlockIndex)); + + assertNull(actualMetadataField1); + assertNotNull(actualMetadataField2); + assertNull(actualMetadataField3); + + // User has no permissions on the requested dataverse + Response createSecondUserResponse = UtilIT.createRandomUser(); + String secondApiToken = UtilIT.getApiTokenFromResponse(createSecondUserResponse); + + createDataverseResponse = UtilIT.createRandomDataverse(secondApiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String secondDataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(secondDataverseAlias, true, true, apiToken); + listMetadataBlocksResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + } + + @Test + public void testFeatureDataverse() throws Exception { + + Response createUser = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + assertEquals(200, publishDataverse.getStatusCode()); + + + Response createSubDVToBeFeatured = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-feature", null, apiToken, dataverseAlias); + String subDataverseAlias = UtilIT.getAliasFromResponse(createSubDVToBeFeatured); + + //publish a sub dataverse so that the owner will have something to feature + Response createSubDVToBePublished = UtilIT.createSubDataverse(UtilIT.getRandomDvAlias() + "-pub", null, apiToken, dataverseAlias); + assertEquals(201, createSubDVToBePublished.getStatusCode()); + String subDataverseAliasPub = UtilIT.getAliasFromResponse(createSubDVToBePublished); + publishDataverse = UtilIT.publishDataverseViaNativeApi(subDataverseAliasPub, apiToken); + assertEquals(200, publishDataverse.getStatusCode()); + + //can't feature a dataverse that is unpublished + Response featureSubDVResponseUnpublished = UtilIT.addFeaturedDataverse(dataverseAlias, subDataverseAlias, apiToken); + featureSubDVResponseUnpublished.prettyPrint(); + assertEquals(400, featureSubDVResponseUnpublished.getStatusCode()); + featureSubDVResponseUnpublished.then().assertThat() + .body(containsString("may not be featured")); + + //can't feature a dataverse you don't own + Response featureSubDVResponseNotOwned = UtilIT.addFeaturedDataverse(dataverseAlias, "root", apiToken); + featureSubDVResponseNotOwned.prettyPrint(); + assertEquals(400, featureSubDVResponseNotOwned.getStatusCode()); + featureSubDVResponseNotOwned.then().assertThat() + .body(containsString("may not be featured")); + + //can't feature a dataverse that doesn't exist + Response featureSubDVResponseNotExist = UtilIT.addFeaturedDataverse(dataverseAlias, "dummy-alias-sek-foobar-333", apiToken); + featureSubDVResponseNotExist.prettyPrint(); + assertEquals(400, featureSubDVResponseNotExist.getStatusCode()); + featureSubDVResponseNotExist.then().assertThat() + .body(containsString("Can't find dataverse collection")); + + publishDataverse = UtilIT.publishDataverseViaNativeApi(subDataverseAlias, apiToken); + assertEquals(200, publishDataverse.getStatusCode()); + + //once published it should work + Response featureSubDVResponse = UtilIT.addFeaturedDataverse(dataverseAlias, subDataverseAlias, apiToken); + featureSubDVResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), featureSubDVResponse.getStatusCode()); + + + Response getFeaturedDataverseResponse = UtilIT.getFeaturedDataverses(dataverseAlias, apiToken); + getFeaturedDataverseResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), getFeaturedDataverseResponse.getStatusCode()); + getFeaturedDataverseResponse.then().assertThat() + .body("data[0]", equalTo(subDataverseAlias)); + + Response deleteFeaturedDataverseResponse = UtilIT.deleteFeaturedDataverses(dataverseAlias, apiToken); + deleteFeaturedDataverseResponse.prettyPrint(); + + assertEquals(OK.getStatusCode(), deleteFeaturedDataverseResponse.getStatusCode()); + deleteFeaturedDataverseResponse.then().assertThat() + .body(containsString("Featured dataverses have been removed")); + + Response deleteSubCollectionResponse = UtilIT.deleteDataverse(subDataverseAlias, apiToken); + deleteSubCollectionResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), deleteSubCollectionResponse.getStatusCode()); + + Response deleteSubCollectionPubResponse = UtilIT.deleteDataverse(subDataverseAliasPub, apiToken); + deleteSubCollectionResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), deleteSubCollectionPubResponse.getStatusCode()); + + Response deleteCollectionResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteCollectionResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), deleteCollectionResponse.getStatusCode()); + } + + @Test + public void testUpdateInputLevels() { + Response createUserResponse = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUserResponse); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + // Update valid input levels + String[] testInputLevelNames = {"geographicCoverage", "country"}; + boolean[] testRequiredInputLevels = {true, false}; + boolean[] testIncludedInputLevels = {true, false}; + Response updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, testRequiredInputLevels, testIncludedInputLevels, apiToken); + String actualInputLevelName = updateDataverseInputLevelsResponse.then().extract().path("data.inputLevels[0].datasetFieldTypeName"); + int geographicCoverageInputLevelIndex = actualInputLevelName.equals("geographicCoverage") ? 0 : 1; + updateDataverseInputLevelsResponse.then().assertThat() + .body(String.format("data.inputLevels[%d].include", geographicCoverageInputLevelIndex), equalTo(true)) + .body(String.format("data.inputLevels[%d].required", geographicCoverageInputLevelIndex), equalTo(true)) + .body(String.format("data.inputLevels[%d].include", 1 - geographicCoverageInputLevelIndex), equalTo(false)) + .body(String.format("data.inputLevels[%d].required", 1 - geographicCoverageInputLevelIndex), equalTo(false)) + .statusCode(OK.getStatusCode()); + String actualFieldTypeName1 = updateDataverseInputLevelsResponse.then().extract().path("data.inputLevels[0].datasetFieldTypeName"); + String actualFieldTypeName2 = updateDataverseInputLevelsResponse.then().extract().path("data.inputLevels[1].datasetFieldTypeName"); + assertNotEquals(actualFieldTypeName1, actualFieldTypeName2); + assertThat(testInputLevelNames, hasItemInArray(actualFieldTypeName1)); + assertThat(testInputLevelNames, hasItemInArray(actualFieldTypeName2)); + + // Update input levels with an invalid field type name + String[] testInvalidInputLevelNames = {"geographicCoverage", "invalid1"}; + updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInvalidInputLevelNames, testRequiredInputLevels, testIncludedInputLevels, apiToken); + updateDataverseInputLevelsResponse.then().assertThat() + .body("message", equalTo("Invalid dataset field type name: invalid1")) + .statusCode(BAD_REQUEST.getStatusCode()); + + // Update invalid empty input levels + testInputLevelNames = new String[]{}; + updateDataverseInputLevelsResponse = UtilIT.updateDataverseInputLevels(dataverseAlias, testInputLevelNames, testRequiredInputLevels, testIncludedInputLevels, apiToken); + updateDataverseInputLevelsResponse.then().assertThat() + .body("message", equalTo("Error while updating dataverse input levels: Input level list cannot be null or empty")) + .statusCode(INTERNAL_SERVER_ERROR.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java index 022747a3cdc..22abf6fa2e3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ExternalToolsIT.java @@ -40,21 +40,6 @@ public void testGetExternalTools() { @Test public void testFileLevelTool1() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -116,7 +101,7 @@ public void testFileLevelTool1() { .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("AwesomeTool")); - long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getTool = UtilIT.getExternalTool(toolId); getTool.prettyPrint(); @@ -130,14 +115,17 @@ public void testFileLevelTool1() { .statusCode(BAD_REQUEST.getStatusCode()) .body("message", CoreMatchers.equalTo("Type must be one of these values: [explore, configure, preview, query].")); - Response getExternalToolsForTabularFiles = UtilIT.getExternalToolsForFile(tabularFileId.toString(), "explore", apiToken); + // Getting tool by tool Id to avoid issue where there are existing tools + String toolIdString = toolId.toString(); + Response getExternalToolsForTabularFiles = UtilIT.getExternalToolForFileById(tabularFileId.toString(), "explore", apiToken, toolIdString); getExternalToolsForTabularFiles.prettyPrint(); + getExternalToolsForTabularFiles.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data[0].displayName", CoreMatchers.equalTo("AwesomeTool")) - .body("data[0].scope", CoreMatchers.equalTo("file")) - .body("data[0].contentType", CoreMatchers.equalTo("text/tab-separated-values")) - .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken)); + .body("data.displayName", CoreMatchers.equalTo("AwesomeTool")) + .body("data.scope", CoreMatchers.equalTo("file")) + .body("data.contentType", CoreMatchers.equalTo("text/tab-separated-values")) + .body("data.toolUrlWithQueryParams", CoreMatchers.equalTo("http://awesometool.com?fileid=" + tabularFileId + "&key=" + apiToken)); Response getExternalToolsForJuptyerNotebooks = UtilIT.getExternalToolsForFile(jupyterNotebookFileId.toString(), "explore", apiToken); getExternalToolsForJuptyerNotebooks.prettyPrint(); @@ -145,26 +133,16 @@ public void testFileLevelTool1() { .statusCode(OK.getStatusCode()) // No tools for this file type. .body("data", Matchers.hasSize(0)); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } @Test public void testDatasetLevelTool1() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -184,7 +162,6 @@ public void testDatasetLevelTool1() { createDataset.then().assertThat() .statusCode(CREATED.getStatusCode()); -// Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset); Integer datasetId = JsonPath.from(createDataset.getBody().asString()).getInt("data.id"); String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId"); @@ -219,6 +196,8 @@ public void testDatasetLevelTool1() { addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("DatasetTool1")); + + Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getExternalToolsByDatasetIdInvalidType = UtilIT.getExternalToolsForDataset(datasetId.toString(), "invalidType", apiToken); getExternalToolsByDatasetIdInvalidType.prettyPrint(); @@ -226,34 +205,23 @@ public void testDatasetLevelTool1() { .statusCode(BAD_REQUEST.getStatusCode()) .body("message", CoreMatchers.equalTo("Type must be one of these values: [explore, configure, preview, query].")); - Response getExternalToolsByDatasetId = UtilIT.getExternalToolsForDataset(datasetId.toString(), "explore", apiToken); + Response getExternalToolsByDatasetId = UtilIT.getExternalToolForDatasetById(datasetId.toString(), "explore", apiToken, toolId.toString()); getExternalToolsByDatasetId.prettyPrint(); getExternalToolsByDatasetId.then().assertThat() - .body("data[0].displayName", CoreMatchers.equalTo("DatasetTool1")) - .body("data[0].scope", CoreMatchers.equalTo("dataset")) - .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("http://datasettool1.com?datasetPid=" + datasetPid + "&key=" + apiToken)) + .body("data.displayName", CoreMatchers.equalTo("DatasetTool1")) + .body("data.scope", CoreMatchers.equalTo("dataset")) + .body("data.toolUrlWithQueryParams", CoreMatchers.equalTo("http://datasettool1.com?datasetPid=" + datasetPid + "&key=" + apiToken)) + .statusCode(OK.getStatusCode()); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); - } @Test public void testDatasetLevelToolConfigure() { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -302,14 +270,20 @@ public void testDatasetLevelToolConfigure() { addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("Dataset Configurator")); - - Response getExternalToolsByDatasetId = UtilIT.getExternalToolsForDataset(datasetId.toString(), "configure", apiToken); + + Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + Response getExternalToolsByDatasetId = UtilIT.getExternalToolForDatasetById(datasetId.toString(), "configure", apiToken, toolId.toString()); getExternalToolsByDatasetId.prettyPrint(); getExternalToolsByDatasetId.then().assertThat() - .body("data[0].displayName", CoreMatchers.equalTo("Dataset Configurator")) - .body("data[0].scope", CoreMatchers.equalTo("dataset")) - .body("data[0].types[0]", CoreMatchers.equalTo("configure")) - .body("data[0].toolUrlWithQueryParams", CoreMatchers.equalTo("https://datasetconfigurator.com?datasetPid=" + datasetPid)) + .body("data.displayName", CoreMatchers.equalTo("Dataset Configurator")) + .body("data.scope", CoreMatchers.equalTo("dataset")) + .body("data.types[0]", CoreMatchers.equalTo("configure")) + .body("data.toolUrlWithQueryParams", CoreMatchers.equalTo("https://datasetconfigurator.com?datasetPid=" + datasetPid)) + .statusCode(OK.getStatusCode()); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); } @@ -400,12 +374,13 @@ public void deleteTools() { String body = getTools.getBody().asString(); JsonReader bodyObject = Json.createReader(new StringReader(body)); JsonArray tools = bodyObject.readObject().getJsonArray("data"); + /* for (int i = 0; i < tools.size(); i++) { JsonObject tool = tools.getJsonObject(i); int id = tool.getInt("id"); Response deleteExternalTool = UtilIT.deleteExternalTool(id); deleteExternalTool.prettyPrint(); - } + }*/ } // preview only @@ -446,12 +421,20 @@ public void createToolShellScript() { addExternalTool.prettyPrint(); addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); } // explore only @Disabled @Test public void createToolDataExplorer() { + /* JsonObjectBuilder job = Json.createObjectBuilder(); job.add("displayName", "Data Explorer"); job.add("description", ""); @@ -479,6 +462,14 @@ public void createToolDataExplorer() { addExternalTool.prettyPrint(); addExternalTool.then().assertThat() .statusCode(OK.getStatusCode()); + + long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); + */ } // both preview and explore @@ -527,21 +518,6 @@ public void createToolSpreadsheetViewer() { @Test public void testFileLevelToolWithAuxFileReq() throws IOException { - // Delete all external tools before testing. - Response getTools = UtilIT.getExternalTools(); - getTools.prettyPrint(); - getTools.then().assertThat() - .statusCode(OK.getStatusCode()); - String body = getTools.getBody().asString(); - JsonReader bodyObject = Json.createReader(new StringReader(body)); - JsonArray tools = bodyObject.readObject().getJsonArray("data"); - for (int i = 0; i < tools.size(); i++) { - JsonObject tool = tools.getJsonObject(i); - int id = tool.getInt("id"); - Response deleteExternalTool = UtilIT.deleteExternalTool(id); - deleteExternalTool.prettyPrint(); - } - Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); createUser.then().assertThat() @@ -617,7 +593,7 @@ public void testFileLevelToolWithAuxFileReq() throws IOException { .statusCode(OK.getStatusCode()) .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool")); - long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); + Long toolId = JsonPath.from(addExternalTool.getBody().asString()).getLong("data.id"); Response getTool = UtilIT.getExternalTool(toolId); getTool.prettyPrint(); @@ -633,13 +609,19 @@ public void testFileLevelToolWithAuxFileReq() throws IOException { .body("data", Matchers.hasSize(0)); // The tool shows for a true HDF5 file. The NcML aux file is available. Requirements met. - Response getToolsForTrueHdf5 = UtilIT.getExternalToolsForFile(trueHdf5.toString(), "preview", apiToken); + Response getToolsForTrueHdf5 = UtilIT.getExternalToolForFileById(trueHdf5.toString(), "preview", apiToken, toolId.toString()); getToolsForTrueHdf5.prettyPrint(); getToolsForTrueHdf5.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data[0].displayName", CoreMatchers.equalTo("HDF5 Tool")) - .body("data[0].scope", CoreMatchers.equalTo("file")) - .body("data[0].contentType", CoreMatchers.equalTo("application/x-hdf5")); + .body("data.displayName", CoreMatchers.equalTo("HDF5 Tool")) + .body("data.scope", CoreMatchers.equalTo("file")) + .body("data.contentType", CoreMatchers.equalTo("application/x-hdf5")); + + //Delete the tool added by this test... + Response deleteExternalTool = UtilIT.deleteExternalTool(toolId); + deleteExternalTool.then().assertThat() + .statusCode(OK.getStatusCode()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java index 16726485dee..e3c26284d55 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/FilesIT.java @@ -11,12 +11,14 @@ import org.junit.jupiter.api.BeforeAll; import io.restassured.path.json.JsonPath; -import static edu.harvard.iq.dataverse.api.ApiConstants.DS_VERSION_DRAFT; +import static edu.harvard.iq.dataverse.api.ApiConstants.*; import static io.restassured.path.json.JsonPath.with; import io.restassured.path.xml.XmlPath; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.FileUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; @@ -33,6 +35,9 @@ import jakarta.json.JsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.*; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.time.Year; import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; import org.junit.jupiter.api.AfterAll; @@ -1036,7 +1041,7 @@ public void testRestrictFile() { } - @Test + @Test public void testRestrictAddedFile() { msgt("testRestrictAddedFile"); @@ -1141,9 +1146,6 @@ public void testAccessFacet() { UtilIT.setSetting(SettingsServiceBean.Key.PublicInstall, "false"); } - - - @Test public void test_AddFileBadUploadFormat() { @@ -1398,16 +1400,288 @@ public void testDataSizeInDataverse() throws InterruptedException { assertEquals(magicControlString, JsonPath.from(datasetDownloadSizeResponse.body().asString()).getString("data.message")); } - + @Test public void testGetFileInfo() { + Response createUser = UtilIT.createRandomUser(); + String superUserUsername = UtilIT.getUsernameFromResponse(createUser); + String superUserApiToken = UtilIT.getApiTokenFromResponse(createUser); + UtilIT.makeSuperUser(superUserUsername); + String dataverseAlias = createDataverseGetAlias(superUserApiToken); + Integer datasetId = createDatasetGetId(dataverseAlias, superUserApiToken); + + createUser = UtilIT.createRandomUser(); + String regularUsername = UtilIT.getUsernameFromResponse(createUser); + String regularApiToken = UtilIT.getApiTokenFromResponse(createUser); + + msg("Add a non-tabular file"); + String pathToFile = "scripts/search/data/binary/trees.png"; + Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, superUserApiToken); + + // The following tests cover cases where no version ID is specified in the endpoint + // Superuser should get to see draft file data + String dataFileId = addResponse.getBody().jsonPath().getString("data.files[0].dataFile.id"); + Response getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken); + String newFileName = "trees.png"; + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileName)) + .body("data.dataFile.filename", equalTo(newFileName)) + .body("data.dataFile.contentType", equalTo("image/png")) + .body("data.dataFile.filesize", equalTo(8361)) + .statusCode(OK.getStatusCode()); + + // Regular user should not get to see draft file data + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken); + getFileDataResponse.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + // Publish dataverse and dataset + Response publishDataverseResp = UtilIT.publishDataverseViaSword(dataverseAlias, superUserApiToken); + publishDataverseResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + Response publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", superUserApiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Regular user should get to see published file data + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileName)); + + // The following tests cover cases where a version ID is specified in the endpoint + // Superuser should not get to see draft file data when no draft version exists + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_DRAFT); + getFileDataResponse.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + // Regular user should get to see file data from specific version filtering by tag + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, "1.0"); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileName)); + + // Update the file metadata + String newFileNameFirstUpdate = "trees_2.png"; + JsonObjectBuilder updateFileMetadata = Json.createObjectBuilder() + .add("label", newFileNameFirstUpdate); + Response updateFileMetadataResponse = UtilIT.updateFileMetadata(dataFileId, updateFileMetadata.build().toString(), superUserApiToken); + updateFileMetadataResponse.then().statusCode(OK.getStatusCode()); + + // Superuser should get to see draft file data + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_DRAFT); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Regular user should not get to see draft file data + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_DRAFT); + getFileDataResponse.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + // Publish dataset once again + publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", superUserApiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Update the file metadata once again + String newFileNameSecondUpdate = "trees_3.png"; + updateFileMetadata = Json.createObjectBuilder() + .add("label", newFileNameSecondUpdate); + updateFileMetadataResponse = UtilIT.updateFileMetadata(dataFileId, updateFileMetadata.build().toString(), superUserApiToken); + updateFileMetadataResponse.then().statusCode(OK.getStatusCode()); + + // Regular user should get to see latest published file data + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST_PUBLISHED); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileNameFirstUpdate)); + + // Regular user should get to see latest published file data if latest is requested + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileNameFirstUpdate)); + + // Superuser should get to see draft file data if latest is requested + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileNameSecondUpdate)); + + // Publish dataset once again + publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", superUserApiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Regular user should get to see file data by specific version number + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "2.0"); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileNameFirstUpdate)); + + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "3.0"); + getFileDataResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.label", equalTo(newFileNameSecondUpdate)); + + // The following tests cover cases where the dataset version is deaccessioned + + Response deaccessionDatasetResponse = UtilIT.deaccessionDataset(datasetId, "3.0", "Test reason", null, superUserApiToken); + deaccessionDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Superuser should get to see file data if the latest version is deaccessioned filtering by latest and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameSecondUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should get to see file data if the latest version is deaccessioned filtering by latest published and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST_PUBLISHED, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameSecondUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should get to see version 2.0 file data if the latest version is deaccessioned filtering by latest and includeDeaccessioned is false + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST, false, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should get to see version 2.0 file data if the latest version is deaccessioned filtering by latest published and includeDeaccessioned is false + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST_PUBLISHED, false, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should get to see file data from specific deaccessioned version filtering by tag and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, "3.0", true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameSecondUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should not get to see file data from specific deaccessioned version filtering by tag and includeDeaccessioned is false + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, "3.0", false, false); + getFileDataResponse.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + // Regular user should get to see version 2.0 file data if the latest version is deaccessioned filtering by latest and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should get to see version 2.0 file data if the latest version is deaccessioned filtering by latest published and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST_PUBLISHED, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should get to see version 2.0 file data if the latest version is deaccessioned filtering by latest published and includeDeaccessioned is false + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST_PUBLISHED, false, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should not get to see file data from specific deaccessioned version filtering by tag and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "3.0", true, false); + getFileDataResponse.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + // Regular user should not get to see file data from specific deaccessioned version filtering by tag and includeDeaccessioned is false + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "3.0", false, false); + getFileDataResponse.then().assertThat() + .statusCode(NOT_FOUND.getStatusCode()); + + // Update the file metadata + String newFileNameThirdUpdate = "trees_4.png"; + updateFileMetadata = Json.createObjectBuilder() + .add("label", newFileNameThirdUpdate); + updateFileMetadataResponse = UtilIT.updateFileMetadata(dataFileId, updateFileMetadata.build().toString(), superUserApiToken); + updateFileMetadataResponse.then().statusCode(OK.getStatusCode()); + + // Superuser should get to see draft file data if draft exists filtering by latest and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameThirdUpdate)) + .statusCode(OK.getStatusCode()); + + // Superuser should get to see latest published file data if draft exists filtering by latest published and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, superUserApiToken, DS_VERSION_LATEST_PUBLISHED, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameSecondUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should get to see version 2.0 file data if the latest version is deaccessioned and draft exists filtering by latest and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should get to see version 2.0 file data if the latest version is deaccessioned and draft exists filtering by latest published and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST_PUBLISHED, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameFirstUpdate)) + .statusCode(OK.getStatusCode()); + + // Publish dataset once again + publishDatasetResp = UtilIT.publishDatasetViaNativeApi(datasetId, "major", superUserApiToken); + publishDatasetResp.then().assertThat() + .statusCode(OK.getStatusCode()); + + // Regular user should get to see file data if the latest version is not deaccessioned filtering by latest and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameThirdUpdate)) + .statusCode(OK.getStatusCode()); + + // Regular user should get to see file data if the latest version is not deaccessioned filtering by latest published and includeDeaccessioned is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, DS_VERSION_LATEST_PUBLISHED, true, false); + getFileDataResponse.then().assertThat() + .body("data.label", equalTo(newFileNameThirdUpdate)) + .statusCode(OK.getStatusCode()); + + // The following tests cover cases where the user requests to include the dataset version information in the response + // User should get to see dataset version info in the response if returnDatasetVersion is true + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "1.0", false, true); + getFileDataResponse.then().assertThat() + .body("data.datasetVersion.versionState", equalTo("RELEASED")) + .statusCode(OK.getStatusCode()); + + // User should not get to see dataset version info in the response if returnDatasetVersion is false + getFileDataResponse = UtilIT.getFileData(dataFileId, regularApiToken, "1.0", false, false); + getFileDataResponse.then().assertThat() + .body("data.datasetVersion", equalTo(null)) + .statusCode(OK.getStatusCode()); + + // Cleanup + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, superUserApiToken); + destroyDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, superUserApiToken); + deleteDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + Response deleteUserResponse = UtilIT.deleteUser(superUserUsername); + deleteUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + + deleteUserResponse = UtilIT.deleteUser(regularUsername); + deleteUserResponse.then().assertThat().statusCode(OK.getStatusCode()); + } + + @Test + public void testGetFileOwners() { Response createUser = UtilIT.createRandomUser(); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); Response makeSuperUser = UtilIT.makeSuperUser(username); String dataverseAlias = createDataverseGetAlias(apiToken); - Integer datasetId = createDatasetGetId(dataverseAlias, apiToken); + + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); createUser = UtilIT.createRandomUser(); String apiTokenRegular = UtilIT.getApiTokenFromResponse(createUser); @@ -1421,7 +1695,7 @@ public void testGetFileInfo() { addResponse.prettyPrint(); - Response getFileDataResponse = UtilIT.getFileData(dataFileId, apiToken); + Response getFileDataResponse = UtilIT.getFileWithOwners(dataFileId, apiToken, true); getFileDataResponse.prettyPrint(); getFileDataResponse.then().assertThat() @@ -1431,9 +1705,8 @@ public void testGetFileInfo() { .body("data.dataFile.filesize", equalTo(8361)) .statusCode(OK.getStatusCode()); - getFileDataResponse = UtilIT.getFileData(dataFileId, apiTokenRegular); - getFileDataResponse.then().assertThat() - .statusCode(BAD_REQUEST.getStatusCode()); + getFileDataResponse.then().assertThat().body("data.dataFile.isPartOf.identifier", equalTo(datasetId)); + getFileDataResponse.then().assertThat().body("data.dataFile.isPartOf.persistentIdentifier", equalTo(datasetPid)); // ------------------------- // Publish dataverse and dataset @@ -1452,6 +1725,7 @@ public void testGetFileInfo() { .statusCode(OK.getStatusCode()); //cleanup + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); assertEquals(200, destroyDatasetResponse.getStatusCode()); @@ -1460,6 +1734,8 @@ public void testGetFileInfo() { Response deleteUserResponse = UtilIT.deleteUser(username); assertEquals(200, deleteUserResponse.getStatusCode()); + + } @Test @@ -2362,4 +2638,360 @@ public void testGetHasBeenDeleted() { fileHasBeenDeleted = JsonPath.from(getHasBeenDeletedResponse.body().asString()).getBoolean("data"); assertTrue(fileHasBeenDeleted); } + + @Test + public void testCollectionStorageQuotas() { + // A minimal storage quota functionality test: + // - We create a collection and define a storage quota + // - We configure Dataverse to enforce it + // - We confirm that we can upload a file with the size under the quota + // - We confirm that we cannot upload a file once the quota is reached + // - We disable the quota on the collection via the API + + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String username = UtilIT.getUsernameFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + + System.out.println("dataset id: "+datasetId); + + Response checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken); + checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode()); + // This brand new collection shouldn't have any quota defined yet: + assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message")); + + // Set quota to 1K: + Response setQuotaResponse = UtilIT.setCollectionQuota(dataverseAlias, 1024, apiToken); + setQuotaResponse.then().assertThat().statusCode(OK.getStatusCode()); + assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.updated"), JsonPath.from(setQuotaResponse.body().asString()).getString("data.message")); + + // Check again: + checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken); + checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode()); + String expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.quota.allocation", Arrays.asList("1,024")); + assertEquals(expectedApiMessage, JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message")); + + System.out.println(expectedApiMessage); + + UtilIT.enableSetting(SettingsServiceBean.Key.UseStorageQuotas); + + String pathToFile306bytes = "src/test/resources/FileRecordJobIT.properties"; + String pathToFile1787bytes = "src/test/resources/datacite.xml"; + + // Upload a small file: + + Response uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile306bytes, Json.createObjectBuilder().build(), apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Check the recorded storage use: + + Response checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken); + checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode()); + expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306")); + assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message")); + + System.out.println(expectedApiMessage); + + // Attempt to upload the second file - this should get us over the quota, + // so it should be rejected: + + uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile1787bytes, Json.createObjectBuilder().build(), apiToken); + uploadResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + // We should get this error message made up from 2 Bundle strings: + expectedApiMessage = BundleUtil.getStringFromBundle("file.addreplace.error.ingest_create_file_err"); + expectedApiMessage = expectedApiMessage + " " + BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded", Arrays.asList("1.7 KB", "718 B")); + assertEquals(expectedApiMessage, JsonPath.from(uploadResponse.body().asString()).getString("message")); + + System.out.println(expectedApiMessage); + + // Check Storage Use again - should be unchanged: + + checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken); + checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode()); + expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("306")); + assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message")); + + // Disable the quota on the collection; try again: + + Response disableQuotaResponse = UtilIT.disableCollectionQuota(dataverseAlias, apiToken); + disableQuotaResponse.then().assertThat().statusCode(OK.getStatusCode()); + expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.quota.deleted"); + assertEquals(expectedApiMessage, JsonPath.from(disableQuotaResponse.body().asString()).getString("data.message")); + + // Check again: + + checkQuotaResponse = UtilIT.checkCollectionQuota(dataverseAlias, apiToken); + checkQuotaResponse.then().assertThat().statusCode(OK.getStatusCode()); + // ... should say "no quota", again: + assertEquals(BundleUtil.getStringFromBundle("dataverse.storage.quota.notdefined"), JsonPath.from(checkQuotaResponse.body().asString()).getString("data.message")); + + // And try to upload the larger file again: + + uploadResponse = UtilIT.uploadFileViaNative(Integer.toString(datasetId), pathToFile1787bytes, Json.createObjectBuilder().build(), apiToken); + // ... should work this time around: + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Let's confirm that the total storage use has been properly implemented: + + //try {sleep(1000);}catch(InterruptedException ie){} + + checkStorageUseResponse = UtilIT.checkCollectionStorageUse(dataverseAlias, apiToken); + checkStorageUseResponse.then().assertThat().statusCode(OK.getStatusCode()); + expectedApiMessage = BundleUtil.getStringFromBundle("dataverse.storage.use", Arrays.asList("2,093")); + assertEquals(expectedApiMessage, JsonPath.from(checkStorageUseResponse.body().asString()).getString("data.message")); + + System.out.println(expectedApiMessage); + + // @todo: a test for the storage use hierarchy? - create a couple of + // sub-collections, upload a file into a dataset in the farthest branch + // collection, make sure the usage has been incremented all the way up + // to the root? + + UtilIT.deleteSetting(SettingsServiceBean.Key.UseStorageQuotas); + } + + @Test + public void testIngestWithAndWithoutVariableHeader() throws NoSuchAlgorithmException { + msgt("testIngestWithAndWithoutVariableHeader"); + + // The compact Stata file we'll be using for this test: + // (this file is provided by Stata inc. - it's genuine quality) + String pathToFile = "scripts/search/data/tabular/stata13-auto.dta"; + // The pre-calculated MD5 signature of the *complete* tab-delimited + // file as seen by the final Access API user (i.e., with the variable + // header line in it): + String tabularFileMD5 = "f298c2567cc8eb544e36ad83edf6f595"; + // Expected byte sizes of the generated tab-delimited file as stored, + // with and without the header: + int tabularFileSizeWoutHeader = 4026; + int tabularFileSizeWithHeader = 4113; + + String apiToken = createUserGetToken(); + String dataverseAlias = createDataverseGetAlias(apiToken); + Integer datasetIdA = createDatasetGetId(dataverseAlias, apiToken); + + // Before we do anything else, make sure that the instance is configured + // the "old" way, i.e., to store ingested files without the headers: + UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + Response addResponse = UtilIT.uploadFileViaNative(datasetIdA.toString(), pathToFile, apiToken); + addResponse.prettyPrint(); + + addResponse.then().assertThat() + .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13")) + .body("data.files[0].label", equalTo("stata13-auto.dta")) + .statusCode(OK.getStatusCode()); + + Long fileIdA = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + assertNotNull(fileIdA); + + // Give file time to ingest + assertTrue(UtilIT.sleepForLock(datasetIdA.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(A)"); + + // Check the metadata to confirm that the file has ingested: + + Response fileDataResponse = UtilIT.getFileData(fileIdA.toString(), apiToken); + fileDataResponse.prettyPrint(); + fileDataResponse.then().assertThat() + .body("data.dataFile.filename", equalTo("stata13-auto.tab")) + .body("data.dataFile.contentType", equalTo("text/tab-separated-values")) + .body("data.dataFile.filesize", equalTo(tabularFileSizeWoutHeader)) + .statusCode(OK.getStatusCode()); + + + // Download the file, verify the checksum: + + Response fileDownloadResponse = UtilIT.downloadFile(fileIdA.intValue(), apiToken); + fileDownloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + byte[] fileDownloadBytes = fileDownloadResponse.body().asByteArray(); + MessageDigest messageDigest = MessageDigest.getInstance("MD5"); + messageDigest.update(fileDownloadBytes); + byte[] rawDigestBytes = messageDigest.digest(); + String tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes); + + msgt("md5 of the downloaded file (saved without the variable name header): "+tabularFileMD5calculated); + + assertEquals(tabularFileMD5, tabularFileMD5calculated); + + // Repeat the whole thing, in another dataset (because we will be uploading + // an identical file), but with the "store with the header setting enabled): + + UtilIT.enableSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + Integer datasetIdB = createDatasetGetId(dataverseAlias, apiToken); + + addResponse = UtilIT.uploadFileViaNative(datasetIdB.toString(), pathToFile, apiToken); + addResponse.prettyPrint(); + + addResponse.then().assertThat() + .body("data.files[0].dataFile.contentType", equalTo("application/x-stata-13")) + .body("data.files[0].label", equalTo("stata13-auto.dta")) + .statusCode(OK.getStatusCode()); + + Long fileIdB = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + assertNotNull(fileIdB); + + // Give file time to ingest + assertTrue(UtilIT.sleepForLock(datasetIdB.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if Ingest Lock exceeds max duration " + pathToFile + "(B)"); + + // Check the metadata to confirm that the file has ingested: + + fileDataResponse = UtilIT.getFileData(fileIdB.toString(), apiToken); + fileDataResponse.prettyPrint(); + fileDataResponse.then().assertThat() + .body("data.dataFile.filename", equalTo("stata13-auto.tab")) + .body("data.dataFile.contentType", equalTo("text/tab-separated-values")) + .body("data.dataFile.filesize", equalTo(tabularFileSizeWithHeader)) + .statusCode(OK.getStatusCode()); + + + // Download the file, verify the checksum, again + + fileDownloadResponse = UtilIT.downloadFile(fileIdB.intValue(), apiToken); + fileDownloadResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + + fileDownloadBytes = fileDownloadResponse.body().asByteArray(); + messageDigest.reset(); + messageDigest.update(fileDownloadBytes); + rawDigestBytes = messageDigest.digest(); + tabularFileMD5calculated = FileUtil.checksumDigestToString(rawDigestBytes); + + msgt("md5 of the downloaded file (saved with the variable name header): "+tabularFileMD5calculated); + + assertEquals(tabularFileMD5, tabularFileMD5calculated); + + // In other words, whether the file was saved with, or without the header, + // as downloaded by the user, the end result must be the same in both cases! + // In other words, whether that first line with the variable names is already + // in the physical file, or added by Dataverse on the fly, the downloaded + // content must be identical. + + UtilIT.deleteSetting(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders); + + // @todo: cleanup? + } + + + @Test + public void testFileCitationByVersion() throws IOException { + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(OK.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + createDatasetResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + + String pathToTestFile = "src/test/resources/images/coffeeshop.png"; + Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToTestFile, Json.createObjectBuilder().build(), apiToken); + uploadFile.then().assertThat().statusCode(OK.getStatusCode()); + + Integer fileId = JsonPath.from(uploadFile.body().asString()).getInt("data.files[0].dataFile.id"); + + String pidAsUrl = "https://doi.org/" + datasetPid.split("doi:")[1]; + int currentYear = Year.now().getValue(); + + Response draftUnauthNoApitoken = UtilIT.getFileCitation(fileId, DS_VERSION_DRAFT, null); + draftUnauthNoApitoken.prettyPrint(); + draftUnauthNoApitoken.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + Response createNoPermsUser = UtilIT.createRandomUser(); + createNoPermsUser.then().assertThat().statusCode(OK.getStatusCode()); + String noPermsApiToken = UtilIT.getApiTokenFromResponse(createNoPermsUser); + + Response draftUnauthNoPermsApiToken = UtilIT.getFileCitation(fileId, DS_VERSION_DRAFT, noPermsApiToken); + draftUnauthNoPermsApiToken.prettyPrint(); + draftUnauthNoPermsApiToken.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + Response getFileCitationDraft = UtilIT.getFileCitation(fileId, DS_VERSION_DRAFT, apiToken); + getFileCitationDraft.prettyPrint(); + getFileCitationDraft.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("Finch, Fiona, " + currentYear + ", \"Darwin's Finches\", " + pidAsUrl + ", Root, DRAFT VERSION; coffeeshop.png [fileName]")); + + Response publishDataverseResponse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + publishDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response publishDatasetResponse = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + Response publishedNoApiTokenNeeded = UtilIT.getFileCitation(fileId, "1.0", null); + publishedNoApiTokenNeeded.then().assertThat().statusCode(OK.getStatusCode()); + + Response publishedNoPermsApiTokenAllowed = UtilIT.getFileCitation(fileId, "1.0", noPermsApiToken); + publishedNoPermsApiTokenAllowed.then().assertThat().statusCode(OK.getStatusCode()); + + String updateJsonString = """ +{ + "label": "foo.png" +} +"""; + + Response updateMetadataResponse = UtilIT.updateFileMetadata(fileId.toString(), updateJsonString, apiToken); + updateMetadataResponse.prettyPrint(); + assertEquals(OK.getStatusCode(), updateMetadataResponse.getStatusCode()); + + Response getFileCitationPostV1Draft = UtilIT.getFileCitation(fileId, DS_VERSION_DRAFT, apiToken); + getFileCitationPostV1Draft.prettyPrint(); + getFileCitationPostV1Draft.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("Finch, Fiona, " + currentYear + ", \"Darwin's Finches\", " + pidAsUrl + ", Root, DRAFT VERSION; foo.png [fileName]")); + + Response getFileCitationV1OldFilename = UtilIT.getFileCitation(fileId, "1.0", apiToken); + getFileCitationV1OldFilename.prettyPrint(); + getFileCitationV1OldFilename.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("Finch, Fiona, " + currentYear + ", \"Darwin's Finches\", " + pidAsUrl + ", Root, V1; coffeeshop.png [fileName]")); + + UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken) + .then().assertThat().statusCode(OK.getStatusCode()); + + Response deaccessionDataset = UtilIT.deaccessionDataset(datasetId, "1.0", "just because", "http://example.com", apiToken); + deaccessionDataset.prettyPrint(); + deaccessionDataset.then().assertThat().statusCode(OK.getStatusCode()); + + Response getFileCitationV1PostDeaccessionAuthorDefault = UtilIT.getFileCitation(fileId, "1.0", apiToken); + getFileCitationV1PostDeaccessionAuthorDefault.prettyPrint(); + getFileCitationV1PostDeaccessionAuthorDefault.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + Response getFileCitationV1PostDeaccessionAuthorIncludeDeaccessioned = UtilIT.getFileCitation(fileId, "1.0", true, apiToken); + getFileCitationV1PostDeaccessionAuthorIncludeDeaccessioned.prettyPrint(); + getFileCitationV1PostDeaccessionAuthorIncludeDeaccessioned.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", equalTo("Finch, Fiona, " + currentYear + ", \"Darwin's Finches\", " + pidAsUrl + ", Root, V1, DEACCESSIONED VERSION; coffeeshop.png [fileName]")); + + Response getFileCitationV1PostDeaccessionNoApiToken = UtilIT.getFileCitation(fileId, "1.0", null); + getFileCitationV1PostDeaccessionNoApiToken.prettyPrint(); + getFileCitationV1PostDeaccessionNoApiToken.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("message", equalTo("Dataset version cannot be found or unauthorized.")); + + Response getFileCitationV1PostDeaccessionNoPermsUser = UtilIT.getFileCitation(fileId, "1.0", noPermsApiToken); + getFileCitationV1PostDeaccessionNoPermsUser.prettyPrint(); + getFileCitationV1PostDeaccessionNoPermsUser.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()) + .body("message", equalTo("Dataset version cannot be found or unauthorized.")); + + } + } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java index d5388e510d2..340eab161bb 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java @@ -1,23 +1,24 @@ package edu.harvard.iq.dataverse.api; +import java.util.ArrayList; +import java.util.List; import java.util.logging.Logger; - -import org.junit.jupiter.api.Test; +import java.util.stream.Collectors; import io.restassured.RestAssured; import static io.restassured.RestAssured.given; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.UNAUTHORIZED; import static jakarta.ws.rs.core.Response.Status.ACCEPTED; import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; -import org.junit.jupiter.api.BeforeAll; /** * This class tests Harvesting Client functionality. @@ -37,11 +38,13 @@ public class HarvestingClientsIT { private static final String ARCHIVE_URL = "https://demo.dataverse.org"; private static final String HARVEST_METADATA_FORMAT = "oai_dc"; private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test"; - private static final String CONTROL_OAI_SET = "controlTestSet"; - private static final int DATASETS_IN_CONTROL_SET = 7; + private static final String CONTROL_OAI_SET = "controlTestSet2"; + private static final int DATASETS_IN_CONTROL_SET = 8; private static String normalUserAPIKey; private static String adminUserAPIKey; - private static String harvestCollectionAlias; + private static String harvestCollectionAlias; + String clientApiPath = null; + List globalIdList = new ArrayList(); @BeforeAll public static void setUpClass() { @@ -54,6 +57,27 @@ public static void setUpClass() { setupCollection(); } + @AfterEach + public void cleanup() throws InterruptedException { + if (clientApiPath != null) { + Response deleteResponse = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) + .delete(clientApiPath); + clientApiPath = null; + System.out.println("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); + + int i = 0; + int maxWait = 20; + String query = "dsPersistentId:" + globalIdList.stream().map(s -> "\""+s+"\"").collect(Collectors.joining(",")); + do { + if (UtilIT.search(query, normalUserAPIKey).prettyPrint().contains("count_in_response\": 0")) { + break; + } + Thread.sleep(1000L); + } while (i++ < maxWait); + } + globalIdList.clear(); + } private static void setupUsers() { Response cu0 = UtilIT.createRandomUser(); @@ -157,9 +181,19 @@ public void testCreateEditDeleteClient() throws InterruptedException { logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode()); assertEquals(OK.getStatusCode(), rDelete.getStatusCode()); } - + + @Test + public void testHarvestingClientRun_AllowHarvestingMissingCVV_False() throws InterruptedException { + harvestingClientRun(false); + } @Test - public void testHarvestingClientRun() throws InterruptedException { + public void testHarvestingClientRun_AllowHarvestingMissingCVV_True() throws InterruptedException { + harvestingClientRun(true); + } + + private void harvestingClientRun(boolean allowHarvestingMissingCVV) throws InterruptedException { + int expectedNumberOfSetsHarvested = allowHarvestingMissingCVV ? DATASETS_IN_CONTROL_SET : DATASETS_IN_CONTROL_SET - 1; + // This test will create a client and attempt to perform an actual // harvest and validate the resulting harvested content. @@ -170,14 +204,15 @@ public void testHarvestingClientRun() throws InterruptedException { String nickName = "h" + UtilIT.getRandomString(6); - String clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName); + clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName); String clientJson = String.format("{\"dataverseAlias\":\"%s\"," + "\"type\":\"oai\"," + "\"harvestUrl\":\"%s\"," + "\"archiveUrl\":\"%s\"," + "\"set\":\"%s\"," + + "\"allowHarvestingMissingCVV\":%s," + "\"metadataFormat\":\"%s\"}", - harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, HARVEST_METADATA_FORMAT); + harvestCollectionAlias, HARVEST_URL, ARCHIVE_URL, CONTROL_OAI_SET, allowHarvestingMissingCVV, HARVEST_METADATA_FORMAT); Response createResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) @@ -205,11 +240,11 @@ public void testHarvestingClientRun() throws InterruptedException { int i = 0; int maxWait=20; // a very conservative interval; this harvest has no business taking this long do { - // Give it an initial 1 sec. delay, to make sure the client state + // Give it an initial 2 sec. delay, to make sure the client state // has been updated in the database, which can take some appreciable // amount of time on a heavily-loaded server running a full suite of // tests: - Thread.sleep(1000L); + Thread.sleep(2000L); // keep checking the status of the client with the GET api: Response getClientResponse = given() .get(clientApiPath); @@ -242,7 +277,7 @@ public void testHarvestingClientRun() throws InterruptedException { assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty")); // d) Confirm that the correct number of datasets have been harvested: - assertEquals(DATASETS_IN_CONTROL_SET, responseJsonPath.getInt("data.lastDatasetsHarvested")); + assertEquals(expectedNumberOfSetsHarvested, responseJsonPath.getInt("data.lastDatasetsHarvested")); // ok, it looks like the harvest has completed successfully. break; @@ -250,23 +285,22 @@ public void testHarvestingClientRun() throws InterruptedException { } while (i extraDatasetsIdentifiers = new ArrayList<>(); @BeforeAll @@ -84,7 +86,7 @@ private static void setupDatasets() { // create dataset: Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); createDatasetResponse.prettyPrint(); - Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + singleSetDatasetDatabaseId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); // retrieve the global id: singleSetDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); @@ -104,13 +106,13 @@ private static void setupDatasets() { // So wait for all of this to finish. UtilIT.sleepForReexport(singleSetDatasetPersistentId, adminUserAPIKey, 10); - // ... And let's create 4 more datasets for a multi-dataset experiment: + // ... And let's create 5 more datasets for a multi-dataset experiment: - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 5; i++) { // create dataset: createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, adminUserAPIKey); createDatasetResponse.prettyPrint(); - datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); // retrieve the global id: String thisDatasetPersistentId = UtilIT.getDatasetPersistentIdFromResponse(createDatasetResponse); @@ -288,7 +290,7 @@ public void testNativeSetAPI() { } @Test - public void testSetEditAPIandOAIlistSets() { + public void testSetEditAPIandOAIlistSets() throws InterruptedException { // This test focuses on testing the Edit functionality of the Dataverse // OAI Set API and the ListSets method of the Dataverse OAI server. @@ -299,7 +301,7 @@ public void testSetEditAPIandOAIlistSets() { // expected HTTP result codes. String setName = UtilIT.getRandomString(6); - String setDef = "*"; + String setDefinition = "title:Sample"; // Make sure the set does not exist String setPath = String.format("/api/harvest/server/oaisets/%s", setName); @@ -312,20 +314,21 @@ public void testSetEditAPIandOAIlistSets() { // Create the set as admin user Response createSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey) - .body(jsonForTestSpec(setName, setDef)) + .body(jsonForTestSpec(setName, setDefinition)) .post(createPath); assertEquals(201, createSetResponse.getStatusCode()); // I. Test the Modify/Edit (POST method) functionality of the // Dataverse OAI Sets API - String newDefinition = "title:New"; + String persistentId = extraDatasetsIdentifiers.get(0); + String newDefinition = "dsPersistentId:"+persistentId; String newDescription = "updated"; // API Test 1. Try to modify the set as normal user, should fail Response editSetResponse = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, normalUserAPIKey) - .body(jsonForEditSpec(setName, setDef, "")) + .body(jsonForEditSpec(setName, newDefinition, "")) .put(setPath); logger.info("non-admin user editSetResponse.getStatusCode(): " + editSetResponse.getStatusCode()); assertEquals(400, editSetResponse.getStatusCode()); @@ -369,16 +372,35 @@ public void testSetEditAPIandOAIlistSets() { XmlPath responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); - // 2. Validate the payload of the response, by confirming that the set + // 2. The set hasn't been exported yet, so it shouldn't be listed in + // ListSets (#3322). Let's confirm that: + + List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); + // 2a. Confirm that our set is listed: + assertNotNull(listSets, "Unexpected response from ListSets"); + assertEquals(0, listSets.size(), "An unexported OAI set is listed in ListSets"); + + // export the set: + + Response exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); // sleep for a sec to be sure + + // ... try again: + + listSetsResponse = UtilIT.getOaiListSets(); + responseXmlPath = validateOaiVerbResponse(listSetsResponse, "ListSets"); + + // 3. Validate the payload of the response, by confirming that the set // we created and modified, above, is being listed by the OAI server // and its xml record is properly formatted - List listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); + listSets = responseXmlPath.getList("OAI-PMH.ListSets.set.list().findAll{it.setName=='"+setName+"'}", Node.class); - // 2a. Confirm that our set is listed: + // 3a. Confirm that our set is listed: assertNotNull(listSets, "Unexpected response from ListSets"); assertEquals(1, listSets.size(), "Newly-created set isn't properly listed by the OAI server"); - // 2b. Confirm that the set entry contains the updated description: + // 3b. Confirm that the set entry contains the updated description: assertEquals(newDescription, listSets.get(0).getPath("setDescription.metadata.element.field", String.class), "Incorrect description in the ListSets entry"); // ok, the xml record looks good! @@ -395,6 +417,11 @@ public void testSetEditAPIandOAIlistSets() { // OAI set with a single dataset, and attempt to retrieve // it and validate the OAI server responses of the corresponding // ListIdentifiers, ListRecords and GetRecord methods. + // Finally, we will make sure that the test reexport survives + // a reexport when the control dataset is dropped from the search + // index temporarily (if, for example, the site admin cleared their + // solr index in order to reindex everything from scratch - which + // can take a while on a large database). This is per #3437 @Test public void testSingleRecordOaiSet() throws InterruptedException { // Let's try and create an OAI set with the "single set dataset" that @@ -549,6 +576,83 @@ public void testSingleRecordOaiSet() throws InterruptedException { assertEquals("Medicine, Health and Life Sciences", responseXmlPath.getString("OAI-PMH.GetRecord.record.metadata.dc.subject")); // ok, looks legit! + + // Now, let's clear this dataset from Solr: + Response solrClearResponse = UtilIT.indexClearDataset(singleSetDatasetDatabaseId); + assertEquals(200, solrClearResponse.getStatusCode()); + solrClearResponse.prettyPrint(); + + // Now, let's re-export the set. The search query that defines the set + // will no longer find it (todo: confirm this first?). However, since + // the dataset still exists in the database; and would in real life + // be reindexed again, we don't want to mark the OAI record for the + // dataset as "deleted" just yet. (this is a new feature, as of 6.2) + // So, let's re-export the set... + + exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); // wait for just a second, to be safe + + // OAI Test 5. Check ListIdentifiers again: + + Response listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + // ... and confirm that the record for our dataset is still listed + // as active: + List ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header"); + + assertEquals(1, ret.size()); + assertEquals(singleSetDatasetPersistentId, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.identifier")); + assertEquals(setName, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.setSpec")); + // ... and, most importantly, make sure the record does not have a + // `status="deleted"` attribute: + assertNull(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.@status")); + + // TODO: (?) we could also destroy the dataset for real now, and make + // sure the "deleted" attribute has been added to the OAI record. + + // While we are at it, let's now destroy this dataset for real, and + // make sure the "deleted" attribute is actually added once the set + // is re-exported: + + Response destroyDatasetResponse = UtilIT.destroyDataset(singleSetDatasetPersistentId, adminUserAPIKey); + assertEquals(200, destroyDatasetResponse.getStatusCode()); + destroyDatasetResponse.prettyPrint(); + + // Confirm that it no longer exists: + Response datasetNotFoundResponse = UtilIT.nativeGet(singleSetDatasetDatabaseId, adminUserAPIKey); + assertEquals(404, datasetNotFoundResponse.getStatusCode()); + + // Repeat the whole production with re-exporting set and checking + // ListIdentifiers: + + exportSetResponse = UtilIT.exportOaiSet(setName); + assertEquals(200, exportSetResponse.getStatusCode()); + Thread.sleep(1000L); // wait for just a second, to be safe + System.out.println("re-exported the dataset again, with the control dataset destroyed"); + + // OAI Test 6. Check ListIdentifiers again: + + listIdentifiersResponse = UtilIT.getOaiListIdentifiers(setName, "oai_dc"); + assertEquals(OK.getStatusCode(), listIdentifiersResponse.getStatusCode()); + + // Validate the service section of the OAI response: + responseXmlPath = validateOaiVerbResponse(listIdentifiersResponse, "ListIdentifiers"); + + // ... and confirm that the record for our dataset is still listed... + ret = responseXmlPath.getList("OAI-PMH.ListIdentifiers.header"); + assertEquals(1, ret.size()); + assertEquals(singleSetDatasetPersistentId, responseXmlPath + .getString("OAI-PMH.ListIdentifiers.header.identifier")); + + // ... BUT, it should be marked as "deleted" now: + assertEquals(responseXmlPath.getString("OAI-PMH.ListIdentifiers.header.@status"), "deleted"); } @@ -569,9 +673,13 @@ public void testMultiRecordOaiSet() throws InterruptedException { // in the class init: String setName = UtilIT.getRandomString(6); - String setQuery = "(dsPersistentId:" + singleSetDatasetIdentifier; + String setQuery = ""; for (String persistentId : extraDatasetsIdentifiers) { - setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId); + if (setQuery.equals("")) { + setQuery = "(dsPersistentId:" + persistentId; + } else { + setQuery = setQuery.concat(" OR dsPersistentId:" + persistentId); + } } setQuery = setQuery.concat(")"); @@ -712,7 +820,6 @@ public void testMultiRecordOaiSet() throws InterruptedException { boolean allDatasetsListed = true; - allDatasetsListed = persistentIdsInListIdentifiers.contains(singleSetDatasetIdentifier); for (String persistentId : extraDatasetsIdentifiers) { allDatasetsListed = allDatasetsListed && persistentIdsInListIdentifiers.contains(persistentId); } @@ -837,12 +944,11 @@ public void testMultiRecordOaiSet() throws InterruptedException { // Record the last identifier listed on this final page: persistentIdsInListRecords.add(ret.get(0).substring(ret.get(0).lastIndexOf('/') + 1)); - // Finally, let's confirm that the expected 5 datasets have been listed + // Finally, let's confirm again that the expected 5 datasets have been listed // as part of this Set: allDatasetsListed = true; - allDatasetsListed = persistentIdsInListRecords.contains(singleSetDatasetIdentifier); for (String persistentId : extraDatasetsIdentifiers) { allDatasetsListed = allDatasetsListed && persistentIdsInListRecords.contains(persistentId); } @@ -860,11 +966,32 @@ public void testMultiRecordOaiSet() throws InterruptedException { logger.info("deleteResponse.getStatusCode(): " + deleteResponse.getStatusCode()); assertEquals(200, deleteResponse.getStatusCode(), "Failed to delete the control multi-record set"); } - + + @Test + public void testInvalidQueryParams() { + + // The query parameter "verb" must appear. + Response noVerbArg = given().get("/oai?foo=bar"); + noVerbArg.prettyPrint(); + noVerbArg.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("oai.error.@code", equalTo("badVerb")) + .body("oai.error", equalTo("No argument 'verb' found")); + + // The query parameter "verb" cannot appear more than once. + Response repeated = given().get( "/oai?verb=foo&verb=bar"); + repeated.prettyPrint(); + repeated.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("oai.error.@code", equalTo("badVerb")) + .body("oai.error", equalTo("Verb must be singular, given: '[foo, bar]'")); + + } + // TODO: // What else can we test? // Some ideas: - // - Test handling of deleted dataset records + // - Test handling of deleted dataset records - DONE! // - Test "from" and "until" time parameters // - Validate full verb response records against XML schema // (for each supported metadata format, possibly?) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java b/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java index 3d5691dbe03..5e436dd0e98 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/InfoIT.java @@ -1,13 +1,12 @@ package edu.harvard.iq.dataverse.api; import static io.restassured.RestAssured.given; - import io.restassured.response.Response; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; - +import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; @@ -82,6 +81,7 @@ public void testGetZipDownloadLimit() { .body("data", notNullValue()); } + private void testSettingEndpoint(SettingsServiceBean.Key settingKey, String testSettingValue) { String endpoint = "/api/info/settings/" + settingKey; // Setting not found diff --git a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java index 907d3dec4bc..2f46960f9a8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java @@ -9,6 +9,8 @@ import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertTrue; + import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -163,6 +165,8 @@ public void testDeepLinks() { .statusCode(OK.getStatusCode()) .body("data.message", equalTo("Dataverse " + level1a + " linked successfully to " + level1b)); + assertTrue(UtilIT.sleepForSearch("*", apiToken, "&subtree="+level1b, 1, UtilIT.GENERAL_LONG_DURATION), "Zero counts in level1b"); + Response searchLevel1toLevel1 = UtilIT.search("*", apiToken, "&subtree=" + level1b); searchLevel1toLevel1.prettyPrint(); searchLevel1toLevel1.then().assertThat() @@ -184,6 +188,8 @@ public void testDeepLinks() { .statusCode(OK.getStatusCode()) .body("data.message", equalTo("Dataverse " + level2a + " linked successfully to " + level2b)); + assertTrue(UtilIT.sleepForSearch("*", apiToken, "&subtree=" + level2b, 1, UtilIT.GENERAL_LONG_DURATION), "Never found linked dataverse: " + level2b); + Response searchLevel2toLevel2 = UtilIT.search("*", apiToken, "&subtree=" + level2b); searchLevel2toLevel2.prettyPrint(); searchLevel2toLevel2.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java index 7a113fd4caa..69bdd8ee515 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java @@ -1,5 +1,7 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountProcessState; +import io.restassured.path.json.JsonPath; import io.restassured.RestAssured; import io.restassured.response.Response; import java.io.File; @@ -7,8 +9,13 @@ import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.NOT_FOUND; import org.apache.commons.io.FileUtils; +import static org.hamcrest.CoreMatchers.anyOf; import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; + +import org.hamcrest.Matchers; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -171,6 +178,60 @@ public void testMakeDataCountGetMetric() throws IOException { } + @Test + public void testGetUpdateDeleteProcessingState() { + String yearMonth = "2000-01"; + // make sure it isn't in the DB + Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404))); + + Response getState = UtilIT.makeDataCountGetProcessingState(yearMonth); + getState.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + Response updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.PROCESSING.toString()); + updateState.then().assertThat().statusCode(OK.getStatusCode()); + getState = UtilIT.makeDataCountGetProcessingState(yearMonth); + getState.then().assertThat().statusCode(OK.getStatusCode()); + JsonPath stateJson = JsonPath.from(getState.body().asString()); + stateJson.prettyPrint(); + String state1 = stateJson.getString("data.state"); + assertThat(state1, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.PROCESSING.name())); + String updateTimestamp1 = stateJson.getString("data.stateChangeTimestamp"); + + updateState = UtilIT.makeDataCountUpdateProcessingState(yearMonth, MakeDataCountProcessState.MDCProcessState.DONE.toString()); + updateState.then().assertThat().statusCode(OK.getStatusCode()); + stateJson = JsonPath.from(updateState.body().asString()); + stateJson.prettyPrint(); + String state2 = stateJson.getString("data.state"); + String updateTimestamp2 = stateJson.getString("data.stateChangeTimestamp"); + assertThat(state2, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.DONE.name())); + + assertThat(updateTimestamp2, Matchers.is(Matchers.greaterThan(updateTimestamp1))); + + deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(OK.getStatusCode()); + } + + @Test + public void testUpdateProcessingStateWithInvalidState() { + String yearMonth = "2000-02"; + // make sure it isn't in the DB + Response deleteState = UtilIT.makeDataCountDeleteProcessingState(yearMonth); + deleteState.then().assertThat().statusCode(anyOf(equalTo(200), equalTo(404))); + + Response stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState"); + stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + + stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "new"); + stateResponse.then().assertThat().statusCode(OK.getStatusCode()); + stateResponse = UtilIT.makeDataCountUpdateProcessingState(yearMonth, "InvalidState"); + stateResponse.then().assertThat().statusCode(BAD_REQUEST.getStatusCode()); + stateResponse = UtilIT.makeDataCountGetProcessingState(yearMonth); + stateResponse.then().assertThat().statusCode(OK.getStatusCode()); + JsonPath stateJson = JsonPath.from(stateResponse.body().asString()); + String state = stateJson.getString("data.state"); + assertThat(state, Matchers.equalTo(MakeDataCountProcessState.MDCProcessState.NEW.name())); + } + /** * Ignore is set on this test because it requires database edits to pass. * There are currently two citions for doi:10.7910/DVN/HQZOOB but you have diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index c301e158b4e..5f5a7fbc0f8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -8,6 +8,8 @@ import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.OK; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.not; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assumptions.assumeFalse; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -20,12 +22,50 @@ public static void setUpClass() { } @Test - void testGetCitationBlock() { + void testListMetadataBlocks() { + // No optional params enabled + Response listMetadataBlocksResponse = UtilIT.listMetadataBlocks(false, false); + int expectedDefaultNumberOfMetadataBlocks = 6; + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", equalTo(null)) + .body("data.size()", equalTo(expectedDefaultNumberOfMetadataBlocks)); + + // onlyDisplayedOnCreate=true + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(true, false); + int expectedOnlyDisplayedOnCreateNumberOfMetadataBlocks = 1; + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", equalTo(null)) + .body("data[0].displayName", equalTo("Citation Metadata")) + .body("data.size()", equalTo(expectedOnlyDisplayedOnCreateNumberOfMetadataBlocks)); + + // returnDatasetFieldTypes=true + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(false, true); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", not(equalTo(null))) + .body("data.size()", equalTo(expectedDefaultNumberOfMetadataBlocks)); + + // onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true + listMetadataBlocksResponse = UtilIT.listMetadataBlocks(true, true); + listMetadataBlocksResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data[0].fields", not(equalTo(null))) + .body("data[0].displayName", equalTo("Citation Metadata")) + .body("data.size()", equalTo(expectedOnlyDisplayedOnCreateNumberOfMetadataBlocks)); + } + + @Test + void testGetMetadataBlock() { Response getCitationBlock = UtilIT.getMetadataBlock("citation"); getCitationBlock.prettyPrint(); getCitationBlock.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences")); + .body("data.fields.subject.controlledVocabularyValues[0]", CoreMatchers.is("Agricultural Sciences")) + .body("data.fields.title.displayOrder", CoreMatchers.is(0)) + .body("data.fields.title.typeClass", CoreMatchers.is("primitive")) + .body("data.fields.title.isRequired", CoreMatchers.is(true)); } @Test @@ -37,18 +77,18 @@ void testDatasetWithAllDefaultMetadata() { ", response=" + createUser.prettyPrint()); String apiToken = UtilIT.getApiTokenFromResponse(createUser); assumeFalse(apiToken == null || apiToken.isBlank()); - + Response createCollection = UtilIT.createRandomDataverse(apiToken); assumeTrue(createCollection.statusCode() < 300, "code=" + createCollection.statusCode() + ", response=" + createCollection.prettyPrint()); String dataverseAlias = UtilIT.getAliasFromResponse(createCollection); assumeFalse(dataverseAlias == null || dataverseAlias.isBlank()); - + // when String pathToJsonFile = "scripts/api/data/dataset-create-new-all-default-fields.json"; Response createDataset = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); - + // then assertEquals(CREATED.getStatusCode(), createDataset.statusCode(), "code=" + createDataset.statusCode() + diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java index e3328eefb4a..a8f7afc1cb0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetricsIT.java @@ -1,16 +1,18 @@ package edu.harvard.iq.dataverse.api; -import io.restassured.RestAssured; -import io.restassured.response.Response; -import edu.harvard.iq.dataverse.metrics.MetricsUtil; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; import static jakarta.ws.rs.core.Response.Status.OK; -import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import static org.junit.jupiter.api.Assertions.assertEquals; +import edu.harvard.iq.dataverse.metrics.MetricsUtil; +import edu.harvard.iq.dataverse.util.FileUtil; +import io.restassured.RestAssured; +import io.restassured.response.Response; +import jakarta.ws.rs.core.MediaType; //TODO: These tests are fairly flawed as they don't actually add data to compare on. //To improve these tests we should try adding data and see if the number DOESN'T @@ -120,6 +122,54 @@ public void testGetDownloadsToMonth() { response.then().assertThat() .statusCode(BAD_REQUEST.getStatusCode()); } + + @Test + public void testGetAccountsToMonth() { + String thismonth = MetricsUtil.getCurrentMonth(); + + Response response = UtilIT.metricsAccountsToMonth(thismonth, null); + String precache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + //Run each query twice and compare results to tests caching + response = UtilIT.metricsAccountsToMonth(thismonth, null); + String postcache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + assertEquals(precache, postcache); + + //Test error when passing extra query params + response = UtilIT.metricsAccountsToMonth(thismonth, "dataLocation=local"); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + } + + @Test + public void testGetAccountsTimeSeries() { + Response response = UtilIT.metricsAccountsTimeSeries(MediaType.APPLICATION_JSON, null); + String precache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + //Run each query twice and compare results to tests caching + response = UtilIT.metricsAccountsTimeSeries(MediaType.APPLICATION_JSON, null); + String postcache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + assertEquals(precache, postcache); + + response = UtilIT.metricsAccountsTimeSeries(FileUtil.MIME_TYPE_CSV, null); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + //Test error when passing extra query params + response = UtilIT.metricsAccountsTimeSeries(MediaType.APPLICATION_JSON, "dataLocation=local"); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + } @Test @@ -214,6 +264,29 @@ public void testGetDownloadsPastDays() { response.then().assertThat() .statusCode(BAD_REQUEST.getStatusCode()); } + + @Test + public void testGetAccountsPastDays() { + String days = "30"; + + Response response = UtilIT.metricsAccountsPastDays(days, null); + String precache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + //Run each query twice and compare results to tests caching + response = UtilIT.metricsAccountsPastDays(days, null); + String postcache = response.prettyPrint(); + response.then().assertThat() + .statusCode(OK.getStatusCode()); + + assertEquals(precache, postcache); + + //Test error when passing extra query params + response = UtilIT.metricsAccountsPastDays(days, "dataLocation=local"); + response.then().assertThat() + .statusCode(BAD_REQUEST.getStatusCode()); + } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java index d4dba236051..0455f5e35a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/NetcdfIT.java @@ -220,8 +220,8 @@ public void testExtraBoundingBoxFromNetcdf() throws IOException { .statusCode(OK.getStatusCode()) .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].westLongitude.value", equalTo("-16.320007")) .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].eastLongitude.value", equalTo("-6.220001")) - .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].northLongitude.value", equalTo("49.62")) - .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].southLongitude.value", equalTo("41.8")); + .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].northLatitude.value", equalTo("49.62")) + .body("data.latestVersion.metadataBlocks.geospatial.fields[0].value[0].southLatitude.value", equalTo("41.8")); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/OpenApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/OpenApiIT.java new file mode 100644 index 00000000000..eb98bdcda8e --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/OpenApiIT.java @@ -0,0 +1,40 @@ +package edu.harvard.iq.dataverse.api; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import io.restassured.RestAssured; +import io.restassured.response.Response; + +public class OpenApiIT { + + @BeforeAll + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testOpenApi(){ + + Response openApi = UtilIT.getOpenAPI("application/json", "json"); + openApi.prettyPrint(); + openApi.then().assertThat() + .statusCode(200); + + openApi = UtilIT.getOpenAPI("", "json"); + openApi.prettyPrint(); + openApi.then().assertThat() + .statusCode(200); + + openApi = UtilIT.getOpenAPI("", "yaml"); + openApi.prettyPrint(); + openApi.then().assertThat() + .statusCode(200); + + openApi = UtilIT.getOpenAPI("application/json", "yaml"); + openApi.prettyPrint(); + openApi.then().assertThat() + .statusCode(400); + + + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java index 9b3b66538d7..33323ff4239 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ProvIT.java @@ -1,32 +1,42 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import io.restassured.RestAssured; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; -import static jakarta.ws.rs.core.Response.Status.CREATED; -import static jakarta.ws.rs.core.Response.Status.OK; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; +import static jakarta.ws.rs.core.Response.Status.CREATED; import static jakarta.ws.rs.core.Response.Status.FORBIDDEN; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static jakarta.ws.rs.core.Response.Status.OK; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; - +import org.junit.jupiter.api.AfterAll; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class ProvIT { + + private static boolean provEnabled = false; @BeforeAll public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + Response provCollectionStatus = UtilIT.getSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + + provEnabled = provCollectionStatus.getStatusCode() == 200; + if (!provEnabled) { + UtilIT.enableSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } } @Test public void testFreeformDraftActions() { + Response createDepositor = UtilIT.createRandomUser(); createDepositor.prettyPrint(); createDepositor.then().assertThat() @@ -71,6 +81,7 @@ public void testFreeformDraftActions() { JsonObject provFreeFormGood = Json.createObjectBuilder() .add("text", "I inherited this file from my grandfather.") .build(); + Response uploadProvFreeForm = UtilIT.uploadProvFreeForm(dataFileId.toString(), provFreeFormGood, apiTokenForDepositor); uploadProvFreeForm.prettyPrint(); uploadProvFreeForm.then().assertThat() @@ -80,8 +91,7 @@ public void testFreeformDraftActions() { datasetVersions.prettyPrint(); datasetVersions.then().assertThat() .body("data[0].versionState", equalTo("DRAFT")); - - + } @Test @@ -196,6 +206,7 @@ public void testAddProvFile() { .body("data.json", notNullValue(String.class)); assertEquals(200, getProvJson.getStatusCode()); + // TODO: Test that if provenance already exists in CPL (e.g. cplId in fileMetadata is not 0) upload returns error. // There are currently no api endpoints to set up up this test. @@ -204,11 +215,13 @@ public void testAddProvFile() { deleteProvJson.then().assertThat() .statusCode(FORBIDDEN.getStatusCode()); //cannot delete json of a published dataset -// Command removed, redundant -// Response deleteProvFreeForm = UtilIT.deleteProvFreeForm(dataFileId.toString(), apiTokenForDepositor); -// deleteProvFreeForm.prettyPrint(); -// deleteProvFreeForm.then().assertThat() -// .statusCode(OK.getStatusCode()); } + + @AfterAll + public static void tearDownClass() { + if(!provEnabled){ + UtilIT.deleteSetting(SettingsServiceBean.Key.ProvCollectionEnabled); + } + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/RolesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/RolesIT.java index 8b5ac917dea..d15fda3a1a1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/RolesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/RolesIT.java @@ -18,7 +18,7 @@ */ public class RolesIT { - private static final Logger logger = Logger.getLogger(AdminIT.class.getCanonicalName()); + private static final Logger logger = Logger.getLogger(RolesIT.class.getCanonicalName()); @BeforeAll public static void setUp() { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java new file mode 100644 index 00000000000..1e44d952af7 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessDirectIT.java @@ -0,0 +1,97 @@ +package edu.harvard.iq.dataverse.api; + +import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.path.json.JsonPath; +import io.restassured.response.Response; +import io.restassured.specification.RequestSpecification; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; +import org.apache.commons.lang3.math.NumberUtils; +import org.junit.jupiter.api.Test; + +public class S3AccessDirectIT { + + @Test + public void testS3DirectUpload() { + // TODO: remove all these constants + RestAssured.baseURI = "https://demo.dataverse.org"; + String apiToken = ""; + String datasetPid = "doi:10.70122/FK2/UBWSJU"; + String datasetId = "2106131"; + long size = 1000000000l; + + Response getUploadUrls = getUploadUrls(datasetPid, size, apiToken); + getUploadUrls.prettyPrint(); + getUploadUrls.then().assertThat().statusCode(200); + + String url = JsonPath.from(getUploadUrls.asString()).getString("data.url"); + String partSize = JsonPath.from(getUploadUrls.asString()).getString("data.partSize"); + String storageIdentifier = JsonPath.from(getUploadUrls.asString()).getString("data.storageIdentifier"); + System.out.println("url: " + url); + System.out.println("partSize: " + partSize); + System.out.println("storageIdentifier: " + storageIdentifier); + + System.out.println("uploading file via direct upload"); + String decodedUrl = null; + try { + decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException ex) { + } + + InputStream inputStream = new ByteArrayInputStream("bumble".getBytes(StandardCharsets.UTF_8)); + Response uploadFileDirect = uploadFileDirect(decodedUrl, inputStream); + uploadFileDirect.prettyPrint(); + uploadFileDirect.then().assertThat().statusCode(200); + + // TODO: Use MD5 or whatever Dataverse is configured for and + // actually calculate it. + String jsonData = """ +{ + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "%s", + "fileName": "file1.txt", + "mimeType": "text/plain", + "checksum": { + "@type": "SHA-1", + "@value": "123456" + } +} +""".formatted(storageIdentifier); + Response addRemoteFile = UtilIT.addRemoteFile(datasetId, jsonData, apiToken); + addRemoteFile.prettyPrint(); + addRemoteFile.then().assertThat() + .statusCode(200); + } + + static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); + } + + static Response uploadFileDirect(String url, InputStream inputStream) { + return given() + .header("x-amz-tagging", "dv-state=temp") + .body(inputStream) + .put(url); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 8b1e96f3622..74150ca120a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -1,66 +1,396 @@ package edu.harvard.iq.dataverse.api; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; +import com.amazonaws.regions.Regions; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.model.AmazonS3Exception; +import com.amazonaws.services.s3.model.Bucket; +import com.amazonaws.services.s3.model.HeadBucketRequest; import io.restassured.RestAssured; +import static io.restassured.RestAssured.given; +import io.restassured.http.Header; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; +import io.restassured.specification.RequestSpecification; +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.util.logging.Logger; - +import org.apache.commons.lang3.math.NumberUtils; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.Matchers.startsWith; +import org.junit.jupiter.api.Assertions; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; -import static org.hamcrest.Matchers.startsWith; - /** - * NOTE: This test WILL NOT pass if your installation is not configured for Amazon S3 storage. - * For S3 storage, you must set two jvm options: storage-driver-id and s3-bucket-name - * Refer to the guides or to https://github.com/IQSS/dataverse/issues/3921#issuecomment-319973245 - * @author bsilverstein + * This test requires LocalStack and Minio to be running. Developers can use our + * docker-compose file, which has all the necessary configuration. */ public class S3AccessIT { - + private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName()); + static final String BUCKET_NAME = "mybucket"; + static AmazonS3 s3localstack = null; + static AmazonS3 s3minio = null; + @BeforeAll public static void setUp() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); - + + // At least in when spun up by our docker-compose file, the creds don't matter for LocalStack. + String accessKeyLocalStack = "whatever"; + String secretKeyLocalStack = "not used"; + + s3localstack = AmazonS3ClientBuilder.standard() + .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyLocalStack, secretKeyLocalStack))) + .withEndpointConfiguration(new EndpointConfiguration("s3.localhost.localstack.cloud:4566", Regions.US_EAST_2.getName())).build(); + + String accessKeyMinio = "4cc355_k3y"; + String secretKeyMinio = "s3cr3t_4cc355_k3y"; + s3minio = AmazonS3ClientBuilder.standard() + // https://stackoverflow.com/questions/72205086/amazonss3client-throws-unknownhostexception-if-attempting-to-connect-to-a-local + .withPathStyleAccessEnabled(Boolean.TRUE) + .withCredentials(new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKeyMinio, secretKeyMinio))) + .withEndpointConfiguration(new EndpointConfiguration("http://localhost:9000", Regions.US_EAST_1.getName())).build(); + +// System.out.println("buckets on LocalStack before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3localstack.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } +// +// System.out.println("buckets on MinIO before attempting to create " + BUCKET_NAME); +// for (Bucket bucket : s3minio.listBuckets()) { +// System.out.println("bucket: " + bucket); +// } + // create bucket if it doesn't exist + // Note that we create the localstack bucket with conf/localstack/buckets.sh + // because we haven't figured out how to create it properly in Java. + // Perhaps it is missing ACLs. + try { + s3localstack.headBucket(new HeadBucketRequest(BUCKET_NAME)); + } catch (AmazonS3Exception ex) { + s3localstack.createBucket(BUCKET_NAME); + } + + try { + s3minio.headBucket(new HeadBucketRequest(BUCKET_NAME)); + } catch (AmazonS3Exception ex) { + s3minio.createBucket(BUCKET_NAME); + } + } - + + /** + * We're using MinIO for testing non-direct upload. + */ @Test - public void testAddDataFileS3Prefix() { + public void testNonDirectUpload() { + String driverId = "minio1"; + String driverLabel = "MinIO"; + + Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); + String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); + String superusername = UtilIT.getUsernameFromResponse(createSuperuser); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); + storageDrivers.prettyPrint(); + // TODO where is "Local/local" coming from? + String drivers = """ +{ + "status": "OK", + "data": { + "LocalStack": "localstack1", + "MinIO": "minio1", + "Local": "local", + "Filesystem": "file1" + } +}"""; + //create user who will make a dataverse/dataset Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); - + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); - + + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); + originalStorageDriver.prettyPrint(); + originalStorageDriver.then().assertThat() + .body("data.message", equalTo("undefined")) + .statusCode(200); + + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + setStorageDriverToS3.prettyPrint(); + setStorageDriverToS3.then().assertThat() + .statusCode(200); + + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); + updatedStorageDriver.prettyPrint(); + updatedStorageDriver.then().assertThat() + .statusCode(200); + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); - Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); createDatasetResponse.prettyPrint(); - - //upload a tabular file via native, check storage id prefix for s3 + createDatasetResponse.then().assertThat().statusCode(201); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + String datasetStorageIdentifier = datasetPid.substring(4); + + Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken); + getDatasetMetadata.prettyPrint(); + getDatasetMetadata.then().assertThat().statusCode(200); + + //upload a tabular file via native, check storage id prefix for driverId String pathToFile = "scripts/search/data/tabular/1char"; Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); addFileResponse.prettyPrint(); addFileResponse.then().assertThat() - .body("data.files[0].dataFile.storageIdentifier", startsWith("s3://")); - - //clean up test dvobjects and user - Response deleteDataset = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); - deleteDataset.prettyPrint(); - deleteDataset.then().assertThat() + .statusCode(200) + .body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://")); + + String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id"); + + Response getfileMetadata = UtilIT.getFileData(fileId, apiToken); + getfileMetadata.prettyPrint(); + getfileMetadata.then().assertThat().statusCode(200); + + String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier"); + String keyInDataverse = storageIdentifier.split(":")[2]; + Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier); + + String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; + String s3Object = s3minio.getObjectAsString(BUCKET_NAME, keyInS3); + System.out.println("s3Object: " + s3Object); + + // The file uploaded above only contains the character "a". + assertEquals("a".trim(), s3Object.trim()); + + System.out.println("non-direct download..."); + Response downloadFile = UtilIT.downloadFile(Integer.valueOf(fileId), apiToken); + downloadFile.then().assertThat().statusCode(200); + + String contentsOfDownloadedFile = downloadFile.getBody().asString(); + assertEquals("a\n", contentsOfDownloadedFile); + + Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken); + deleteFile.prettyPrint(); + deleteFile.then().assertThat().statusCode(200); + + AmazonS3Exception expectedException = null; + try { + s3minio.getObjectAsString(BUCKET_NAME, keyInS3); + } catch (AmazonS3Exception ex) { + expectedException = ex; + } + assertNotNull(expectedException); + // 404 because the file has been sucessfully deleted + assertEquals(404, expectedException.getStatusCode()); + + } + + /** + * We use LocalStack to test direct upload. + */ + @Test + public void testDirectUpload() { + String driverId = "localstack1"; + String driverLabel = "LocalStack"; + Response createSuperuser = UtilIT.createRandomUser(); + createSuperuser.then().assertThat().statusCode(200); + String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); + String superusername = UtilIT.getUsernameFromResponse(createSuperuser); + UtilIT.makeSuperUser(superusername).then().assertThat().statusCode(200); + Response storageDrivers = UtilIT.listStorageDrivers(superuserApiToken); + storageDrivers.prettyPrint(); + // TODO where is "Local/local" coming from? + String drivers = """ +{ + "status": "OK", + "data": { + "LocalStack": "localstack1", + "MinIO": "minio1", + "Local": "local", + "Filesystem": "file1" + } +}"""; + + //create user who will make a dataverse/dataset + Response createUser = UtilIT.createRandomUser(); + createUser.then().assertThat().statusCode(200); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response originalStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); + originalStorageDriver.prettyPrint(); + originalStorageDriver.then().assertThat() + .body("data.message", equalTo("undefined")) .statusCode(200); - Response deleteDataverse = UtilIT.deleteDataverse(dataverseAlias, apiToken); - deleteDataverse.prettyPrint(); - deleteDataverse.then().assertThat() + Response setStorageDriverToS3 = UtilIT.setStorageDriver(dataverseAlias, driverLabel, superuserApiToken); + setStorageDriverToS3.prettyPrint(); + setStorageDriverToS3.then().assertThat() .statusCode(200); - - Response deleteUser = UtilIT.deleteUser(username); - deleteUser.prettyPrint(); - deleteUser.then().assertThat() + + Response updatedStorageDriver = UtilIT.getStorageDriver(dataverseAlias, superuserApiToken); + updatedStorageDriver.prettyPrint(); + updatedStorageDriver.then().assertThat() .statusCode(200); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + createDatasetResponse.then().assertThat().statusCode(201); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + String datasetPid = JsonPath.from(createDatasetResponse.body().asString()).getString("data.persistentId"); + String datasetStorageIdentifier = datasetPid.substring(4); + + Response getDatasetMetadata = UtilIT.nativeGet(datasetId, apiToken); + getDatasetMetadata.prettyPrint(); + getDatasetMetadata.then().assertThat().statusCode(200); + +// //upload a tabular file via native, check storage id prefix for driverId +// String pathToFile = "scripts/search/data/tabular/1char"; +// Response addFileResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); +// addFileResponse.prettyPrint(); +// addFileResponse.then().assertThat() +// .statusCode(200) +// .body("data.files[0].dataFile.storageIdentifier", startsWith(driverId + "://")); +// +// String fileId = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.id"); + long size = 1000000000l; + Response getUploadUrls = UtilIT.getUploadUrls(datasetPid, size, apiToken); + getUploadUrls.prettyPrint(); + getUploadUrls.then().assertThat().statusCode(200); + + String url = JsonPath.from(getUploadUrls.asString()).getString("data.url"); + String partSize = JsonPath.from(getUploadUrls.asString()).getString("data.partSize"); + String storageIdentifier = JsonPath.from(getUploadUrls.asString()).getString("data.storageIdentifier"); + System.out.println("url: " + url); + System.out.println("partSize: " + partSize); + System.out.println("storageIdentifier: " + storageIdentifier); + + System.out.println("uploading file via direct upload"); + String decodedUrl = null; + try { + decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException ex) { + } + + // change to localhost because LocalStack is running in a container locally + String localhostUrl = decodedUrl.replace("http://localstack", "http://localhost"); + String contentsOfFile = "foobar"; + + InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); + Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); + uploadFileDirect.prettyPrint(); + /* + Direct upload to MinIO is failing with errors like this: + + SignatureDoesNotMatch + The request signature we calculated does not match the signature you provided. Check your key and signing method. + 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 + mybucket + /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 + 1793915CCC5BC95C + dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 + + */ + uploadFileDirect.then().assertThat().statusCode(200); + + // TODO: Use MD5 or whatever Dataverse is configured for and + // actually calculate it. + String jsonData = """ +{ + "description": "My description.", + "directoryLabel": "data/subdir1", + "categories": [ + "Data" + ], + "restrict": "false", + "storageIdentifier": "%s", + "fileName": "file1.txt", + "mimeType": "text/plain", + "checksum": { + "@type": "SHA-1", + "@value": "123456" } } +""".formatted(storageIdentifier); + + // "There was an error when trying to add the new file. File size must be explicitly specified when creating DataFiles with Direct Upload" + Response addRemoteFile = UtilIT.addRemoteFile(datasetId.toString(), jsonData, apiToken); + addRemoteFile.prettyPrint(); + addRemoteFile.then().assertThat() + .statusCode(200); + + String fileId = JsonPath.from(addRemoteFile.asString()).getString("data.files[0].dataFile.id"); + Response getfileMetadata = UtilIT.getFileData(fileId, apiToken); + getfileMetadata.prettyPrint(); + getfileMetadata.then().assertThat().statusCode(200); + +// String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier"); + String keyInDataverse = storageIdentifier.split(":")[2]; + Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier); + + String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; + String s3Object = s3localstack.getObjectAsString(BUCKET_NAME, keyInS3); + System.out.println("s3Object: " + s3Object); + +// assertEquals(contentsOfFile.trim(), s3Object.trim()); + assertEquals(contentsOfFile, s3Object); + + System.out.println("direct download..."); + Response getHeaders = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); + for (Header header : getHeaders.getHeaders()) { + System.out.println("direct download header: " + header); + } + getHeaders.then().assertThat().statusCode(303); + + String urlFromResponse = getHeaders.getHeader("Location"); + String localhostDownloadUrl = urlFromResponse.replace("localstack", "localhost"); + String decodedDownloadUrl = null; + try { + decodedDownloadUrl = URLDecoder.decode(localhostDownloadUrl, StandardCharsets.UTF_8.name()); + } catch (UnsupportedEncodingException ex) { + } + + Response downloadFile = UtilIT.downloadFromUrl(decodedDownloadUrl); + downloadFile.prettyPrint(); + downloadFile.then().assertThat().statusCode(200); + + String contentsOfDownloadedFile = downloadFile.getBody().asString(); + assertEquals(contentsOfFile, contentsOfDownloadedFile); + + Response deleteFile = UtilIT.deleteFileApi(Integer.parseInt(fileId), apiToken); + deleteFile.prettyPrint(); + deleteFile.then().assertThat().statusCode(200); + + AmazonS3Exception expectedException = null; + try { + s3localstack.getObjectAsString(BUCKET_NAME, keyInS3); + } catch (AmazonS3Exception ex) { + expectedException = ex; + } + assertNotNull(expectedException); + // 404 because the file has been sucessfully deleted + assertEquals(404, expectedException.getStatusCode()); + + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 125753296a2..6e4fd5b0bb3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -105,7 +105,7 @@ public void testSearchPermisions() throws InterruptedException { assertEquals(200, grantUser2AccessOnDataset.getStatusCode()); String searchPart = "id:dataset_" + datasetId1 + "_draft"; - assertTrue(UtilIT.sleepForSearch(searchPart, apiToken2, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart); + assertTrue(UtilIT.sleepForSearch(searchPart, apiToken2, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart); Response shouldBeVisibleToUser2 = UtilIT.search("id:dataset_" + datasetId1 + "_draft", apiToken2); shouldBeVisibleToUser2.prettyPrint(); @@ -589,7 +589,7 @@ public void testDatasetThumbnail() { overrideThumbnailFail.prettyPrint(); overrideThumbnailFail.then().assertThat() - .body("message", CoreMatchers.equalTo("File is larger than maximum size: 500000.")) + .body("message", CoreMatchers.containsString("File is larger than maximum size:")) /** * @todo We want this to expect 400 (BAD_REQUEST), not 403 * (FORBIDDEN). @@ -793,14 +793,9 @@ public void testNestedSubtree() { Response createDataverseResponse2 = UtilIT.createSubDataverse("subDV" + UtilIT.getRandomIdentifier(), null, apiToken, dataverseAlias); createDataverseResponse2.prettyPrint(); String dataverseAlias2 = UtilIT.getAliasFromResponse(createDataverseResponse2); - + String searchPart = "*"; - - Response searchUnpublishedSubtree = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias); - searchUnpublishedSubtree.prettyPrint(); - searchUnpublishedSubtree.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.total_count", CoreMatchers.equalTo(1)); + assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias, 1, UtilIT.GENERAL_LONG_DURATION), "Missing subDV"); Response searchUnpublishedSubtree2 = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias2); searchUnpublishedSubtree2.prettyPrint(); @@ -862,19 +857,9 @@ public void testNestedSubtree() { Response publishDataset = UtilIT.publishDatasetViaNativeApi(datasetPid, "major", apiToken); publishDataset.then().assertThat() .statusCode(OK.getStatusCode()); - - Response searchPublishedSubtreeWDS = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias); - searchPublishedSubtreeWDS.prettyPrint(); - searchPublishedSubtreeWDS.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.total_count", CoreMatchers.equalTo(2)); - - Response searchPublishedSubtreeWDS2 = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias2); - searchPublishedSubtreeWDS2.prettyPrint(); - searchPublishedSubtreeWDS2.then().assertThat() - .statusCode(OK.getStatusCode()) - .body("data.total_count", CoreMatchers.equalTo(1)); - + UtilIT.sleepForReindex(datasetPid, apiToken, 5); + assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias, 2, UtilIT.GENERAL_LONG_DURATION), "Did not find 2 children"); + assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias2, 1, UtilIT.GENERAL_LONG_DURATION), "Did not find 1 child"); } //If this test fails it'll fail inconsistently as it tests underlying async role code @@ -906,16 +891,16 @@ public void testCuratorCardDataversePopulation() throws InterruptedException { String subDataverseAlias = "dv" + UtilIT.getRandomIdentifier(); Response createSubDataverseResponse = UtilIT.createSubDataverse(subDataverseAlias, null, apiTokenSuper, parentDataverseAlias); createSubDataverseResponse.prettyPrint(); - //UtilIT.getAliasFromResponse(createSubDataverseResponse); - + Response grantRoleOnDataverseResponse = UtilIT.grantRoleOnDataverse(subDataverseAlias, "curator", "@" + username, apiTokenSuper); grantRoleOnDataverseResponse.then().assertThat() .statusCode(OK.getStatusCode()); - + String searchPart = "*"; + assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree="+parentDataverseAlias, 1, UtilIT.GENERAL_LONG_DURATION), "Failed test if search exceeds max duration " + searchPart); + Response searchPublishedSubtreeSuper = UtilIT.search(searchPart, apiTokenSuper, "&subtree="+parentDataverseAlias); - assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree="+parentDataverseAlias, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart); searchPublishedSubtreeSuper.prettyPrint(); searchPublishedSubtreeSuper.then().assertThat() .statusCode(OK.getStatusCode()) @@ -968,7 +953,7 @@ public void testSubtreePermissions() { .statusCode(OK.getStatusCode()); // Wait a little while for the index to pick up the datasets, otherwise timing issue with searching for it. - UtilIT.sleepForReindex(datasetId2.toString(), apiToken, 2); + UtilIT.sleepForReindex(datasetId2.toString(), apiToken, 3); String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); String identifier2 = JsonPath.from(datasetAsJson2.getBody().asString()).getString("data.identifier"); @@ -1077,6 +1062,8 @@ public void testSubtreePermissions() { .statusCode(OK.getStatusCode()) .body("data.total_count", CoreMatchers.equalTo(2)); + assertTrue(UtilIT.sleepForSearch(searchPart, null, "&subtree=" + dataverseAlias2, 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Missing dataset w/no apiKey"); + Response searchPublishedSubtreesNoAPI = UtilIT.search(searchPart, null, "&subtree="+dataverseAlias+"&subtree="+dataverseAlias2); searchPublishedSubtreesNoAPI.prettyPrint(); searchPublishedSubtreesNoAPI.then().assertThat() @@ -1197,12 +1184,12 @@ public void testGeospatialSearch() { .add("multiple", false) .add("typeName", "westLongitude") ) - .add("southLongitude", + .add("southLatitude", Json.createObjectBuilder() .add("value", "42.33661") .add("typeClass", "primitive") .add("multiple", false) - .add("typeName", "southLongitude") + .add("typeName", "southLatitude") ) .add("eastLongitude", Json.createObjectBuilder() @@ -1211,12 +1198,12 @@ public void testGeospatialSearch() { .add("multiple", false) .add("typeName", "eastLongitude") ) - .add("northLongitude", + .add("northLatitude", Json.createObjectBuilder() .add("value", "42.409599") .add("typeClass", "primitive") .add("multiple", false) - .add("typeName", "northLongitude") + .add("typeName", "northLatitude") ) ) ) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java index 39156f1c59b..518431bfa2d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SwordIT.java @@ -462,7 +462,7 @@ public void testCreateAndDeleteDatasetInRoot() { assertNull(attemptToGetFileId); } catch (Exception ex) { System.out.println("We expect an exception here because we can no longer find the file because deleted it: " + ex); - assertTrue(ex.getClass().getName().equals(ArrayIndexOutOfBoundsException.class.getName())); + assertTrue(ex instanceof ArrayIndexOutOfBoundsException); } String newTitle = "A New Hope"; @@ -855,7 +855,7 @@ public void testDeleteFiles() { List oneFileLeftInV2Draft = statement3.getBody().xmlPath().getList("feed.entry.id"); logger.info("Number of files remaining in this post version 1 draft:" + oneFileLeftInV2Draft.size()); assertEquals(1, oneFileLeftInV2Draft.size()); - + UtilIT.sleepForLock(datasetPersistentId, "EditInProgress", apiToken, UtilIT.MAXIMUM_PUBLISH_LOCK_DURATION); Response deleteIndex1b = UtilIT.deleteFile(Integer.parseInt(index1b), apiToken); deleteIndex1b.then().assertThat() .statusCode(NO_CONTENT.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/ThumbnailsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/ThumbnailsIT.java index 8d5b6d86cd9..834d5dddc8b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/ThumbnailsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/ThumbnailsIT.java @@ -1,9 +1,18 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.util.BundleUtil; +import io.restassured.path.json.JsonPath; import io.restassured.response.Response; import org.hamcrest.CoreMatchers; import org.junit.jupiter.api.Test; +import java.util.List; + +import static jakarta.ws.rs.core.Response.Status.*; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertTrue; + public class ThumbnailsIT { @Test @@ -35,4 +44,67 @@ public void testDatasetThumbnail() { .statusCode(403); } + + @Test + public void testBadTiffThumbnailFailure() { + String goodTiff = "src/test/resources/images/good.tiff"; + String badTiff = "src/test/resources/images/bad.tiff"; + // setup dataverse and dataset + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + UtilIT.makeSuperUser(username); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + String pathToJsonFile = "src/test/resources/json/complete-dataset-with-files.json"; + Response createDatasetResponse = UtilIT.createDatasetViaNativeApi(dataverseAlias, pathToJsonFile, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + Response datasetAsJson = UtilIT.nativeGet(datasetId, apiToken); + datasetAsJson.prettyPrint(); + String protocol = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.protocol"); + String authority = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.authority"); + String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier"); + String datasetPersistentId = protocol + ":" + authority + "/" + identifier; + + // check thumbnails are empty + + Response thumbnailCandidatesResponse = UtilIT.showDatasetThumbnailCandidates(datasetPersistentId, apiToken); + thumbnailCandidatesResponse.prettyPrint(); + thumbnailCandidatesResponse.then().assertThat().statusCode(OK.getStatusCode()); + List images = JsonPath.from(thumbnailCandidatesResponse.getBody().asString()).getList("data"); + assertTrue(images.size() == 0); + + // upload image files ( 1 good and 1 bad ) + + Response uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), goodTiff, apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + uploadResponse = UtilIT.uploadFileViaNative(datasetId.toString(), badTiff, apiToken); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // check thumbnails only contains 1 good image + thumbnailCandidatesResponse = UtilIT.showDatasetThumbnailCandidates(datasetPersistentId, apiToken); + thumbnailCandidatesResponse.prettyPrint(); + thumbnailCandidatesResponse.then().assertThat().statusCode(OK.getStatusCode()); + images = JsonPath.from(thumbnailCandidatesResponse.getBody().asString()).getList("data"); + assertTrue(images.size() == 1); + + // test set logo with badTiff and tiff to large (goodTiff) + Response uploadLogoResponse = UtilIT.uploadDatasetLogo(datasetPersistentId, badTiff, apiToken); + uploadLogoResponse.prettyPrint(); + uploadLogoResponse.then().assertThat().statusCode(FORBIDDEN.getStatusCode()); + uploadLogoResponse.then().assertThat().body("message", equalTo( + BundleUtil.getStringFromBundle("datasets.api.thumbnail.nonDatasetFailed"))); + + uploadLogoResponse = UtilIT.uploadDatasetLogo(datasetPersistentId, goodTiff, apiToken); + uploadLogoResponse.prettyPrint(); + uploadLogoResponse.then().assertThat().statusCode(FORBIDDEN.getStatusCode()); + uploadLogoResponse.then().assertThat().body("message", containsString( + BundleUtil.getStringFromBundle("datasets.api.thumbnail.fileToLarge", List.of("")))); + + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java index 5880b08e5c2..0189ffd6e58 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.UUID; import jakarta.json.Json; @@ -206,15 +207,13 @@ public void testMergeAccounts(){ String aliasInOwner = "groupFor" + dataverseAlias; String displayName = "Group for " + dataverseAlias; String user2identifier = "@" + usernameConsumed; + String target2identifier = "@" + targetname; Response createGroup = UtilIT.createGroup(dataverseAlias, aliasInOwner, displayName, superuserApiToken); createGroup.prettyPrint(); createGroup.then().assertThat() .statusCode(CREATED.getStatusCode()); - String groupIdentifier = JsonPath.from(createGroup.asString()).getString("data.identifier"); - - List roleAssigneesToAdd = new ArrayList<>(); - roleAssigneesToAdd.add(user2identifier); + List roleAssigneesToAdd = Arrays.asList(user2identifier, target2identifier); Response addToGroup = UtilIT.addToGroup(dataverseAlias, aliasInOwner, roleAssigneesToAdd, superuserApiToken); addToGroup.prettyPrint(); addToGroup.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index e3a7fd0cfc3..0216859b869 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -11,6 +11,7 @@ import jakarta.json.JsonObjectBuilder; import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; + import static jakarta.ws.rs.core.Response.Status.CREATED; import java.nio.charset.StandardCharsets; @@ -48,7 +49,7 @@ import edu.harvard.iq.dataverse.util.StringUtil; import java.util.Collections; -import static org.junit.jupiter.api.Assertions.assertEquals; + import static org.junit.jupiter.api.Assertions.*; public class UtilIT { @@ -62,7 +63,8 @@ public class UtilIT { private static final String BUILTIN_USER_KEY = "burrito"; private static final String EMPTY_STRING = ""; public static final int MAXIMUM_INGEST_LOCK_DURATION = 15; - public static final int MAXIMUM_PUBLISH_LOCK_DURATION = 15; + public static final int MAXIMUM_PUBLISH_LOCK_DURATION = 20; + public static final int GENERAL_LONG_DURATION = 45; //Useful when multiple adds/publishes, etc/ all get done in sequence public static final int MAXIMUM_IMPORT_DURATION = 1; private static SwordConfigurationImpl swordConfiguration = new SwordConfigurationImpl(); @@ -223,7 +225,19 @@ public static Response validateDataFileHashValue(String fileId, String apiToken .post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken); return response; } - + + public static Response clearThumbnailFailureFlags() { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag"); + return response; + } + + public static Response clearThumbnailFailureFlag(long fileId) { + Response response = given() + .delete("/api/admin/clearThumbnailFailureFlag/" + fileId); + return response; + } + private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) { JsonObjectBuilder builder = Json.createObjectBuilder(); builder.add("authenticationProviderId", authenticationProviderId); @@ -407,6 +421,23 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/"); } + static Response getCollectionSchema(String dataverseAlias, String apiToken) { + Response getCollectionSchemaResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/dataverses/" + dataverseAlias + "/datasetSchema"); + return getCollectionSchemaResponse; + } + + static Response validateDatasetJson(String dataverseAlias, String datasetJson, String apiToken) { + Response getValidateDatasetJsonResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(datasetJson) + .contentType("application/json") + .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson"); + return getValidateDatasetJsonResponse; + } + static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) { return createRandomDatasetViaNativeApi(dataverseAlias, apiToken, false); } @@ -618,6 +649,21 @@ static Response setMetadataBlocks(String dataverseAlias, JsonArrayBuilder blocks .post("/api/dataverses/" + dataverseAlias + "/metadatablocks"); } + static Response listMetadataBlocks(String dataverseAlias, boolean onlyDisplayedOnCreate, boolean returnDatasetFieldTypes, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .queryParam("onlyDisplayedOnCreate", onlyDisplayedOnCreate) + .queryParam("returnDatasetFieldTypes", returnDatasetFieldTypes) + .get("/api/dataverses/" + dataverseAlias + "/metadatablocks"); + } + + static Response listMetadataBlocks(boolean onlyDisplayedOnCreate, boolean returnDatasetFieldTypes) { + return given() + .queryParam("onlyDisplayedOnCreate", onlyDisplayedOnCreate) + .queryParam("returnDatasetFieldTypes", returnDatasetFieldTypes) + .get("/api/metadatablocks"); + } + static Response getMetadataBlock(String block) { return given() .get("/api/metadatablocks/" + block); @@ -1055,11 +1101,23 @@ static Response getFileMetadata(String fileIdOrPersistentId, String optionalForm .urlEncodingEnabled(false) .get("/api/access/datafile/" + idInPath + "/metadata" + optionalFormatInPath + optionalQueryParam); } - - static Response getFileData(String fileId, String apiToken) { + + static Response getFileData(String fileId, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) - .get("/api/files/" + fileId ); + .get("/api/files/" + fileId); + } + + static Response getFileData(String fileId, String apiToken, String datasetVersionId) { + return getFileData(fileId, apiToken, datasetVersionId, false, false); + } + + static Response getFileData(String fileId, String apiToken, String datasetVersionId, boolean includeDeaccessioned, boolean returnDatasetVersion) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .queryParam("includeDeaccessioned", includeDeaccessioned) + .queryParam("returnDatasetVersion", returnDatasetVersion) + .get("/api/files/" + fileId + "/versions/" + datasetVersionId); } static Response testIngest(String fileName, String fileType) { @@ -1437,7 +1495,7 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str return getDatasetVersion(persistentId, versionNumber, apiToken, false, false); } - static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean skipFiles, boolean includeDeaccessioned) { + static Response getDatasetVersion(String persistentId, String versionNumber, String apiToken, boolean excludeFiles, boolean includeDeaccessioned) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) .queryParam("includeDeaccessioned", includeDeaccessioned) @@ -1445,7 +1503,32 @@ static Response getDatasetVersion(String persistentId, String versionNumber, Str + versionNumber + "?persistentId=" + persistentId - + (skipFiles ? "&includeFiles=false" : "")); + + (excludeFiles ? "&excludeFiles=true" : "")); + } + + static Response getDatasetWithOwners(String persistentId, String apiToken, boolean returnOwners) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/datasets/:persistentId/" + + "?persistentId=" + + persistentId + + (returnOwners ? "&returnOwners=true" : "")); + } + + static Response getFileWithOwners(String datafileId, String apiToken, boolean returnOwners) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/files/" + + datafileId + + (returnOwners ? "/?returnOwners=true" : "")); + } + + static Response getDataverseWithOwners(String alias, String apiToken, boolean returnOwners) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/" + + alias + + (returnOwners ? "/?returnOwners=true" : "")); } static Response getMetadataBlockFromDatasetVersion(String persistentId, String versionNumber, String metadataBlock, String apiToken) { @@ -1454,17 +1537,28 @@ static Response getMetadataBlockFromDatasetVersion(String persistentId, String v .get("/api/datasets/:persistentId/versions/" + DS_VERSION_LATEST_PUBLISHED + "/metadata/citation?persistentId=" + persistentId); } + @Deprecated static Response makeSuperUser(String username) { Response response = given().post("/api/admin/superuser/" + username); return response; } + static Response setSuperuserStatus(String username, Boolean isSuperUser) { + Response response = given().body(isSuperUser).put("/api/admin/superuser/" + username); + return response; + } + static Response reindexDataset(String persistentId) { Response response = given() .get("/api/admin/index/dataset?persistentId=" + persistentId); return response; } + static Response indexClearDataset(Integer datasetId) { + return given() + .delete("/api/admin/index/datasets/"+datasetId); + } + static Response reindexDataverse(String dvId) { Response response = given() .get("/api/admin/index/dataverses/" + dvId); @@ -1810,15 +1904,15 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken) { return getDatasetVersions(idOrPersistentId, apiToken, false); } - static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean skipFiles) { - return getDatasetVersions(idOrPersistentId, apiToken, null, null, skipFiles); + static Response getDatasetVersions(String idOrPersistentId, String apiToken, boolean excludeFiles) { + return getDatasetVersions(idOrPersistentId, apiToken, null, null, excludeFiles); } static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit) { return getDatasetVersions(idOrPersistentId, apiToken, offset, limit, false); } - static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit, boolean skipFiles) { + static Response getDatasetVersions(String idOrPersistentId, String apiToken, Integer offset, Integer limit, boolean excludeFiles) { logger.info("Getting Dataset Versions"); String idInPath = idOrPersistentId; // Assume it's a number. String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. @@ -1826,11 +1920,11 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, Int idInPath = ":persistentId"; optionalQueryParam = "?persistentId=" + idOrPersistentId; } - if (skipFiles) { + if (excludeFiles) { if ("".equals(optionalQueryParam)) { - optionalQueryParam = "?includeFiles=false"; + optionalQueryParam = "?excludeFiles=true"; } else { - optionalQueryParam = optionalQueryParam.concat("&includeFiles=false"); + optionalQueryParam = optionalQueryParam.concat("&excludeFiles=true"); } } if (offset != null) { @@ -1852,6 +1946,7 @@ static Response getDatasetVersions(String idOrPersistentId, String apiToken, Int requestSpecification = given() .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); } + return requestSpecification.get("/api/datasets/" + idInPath + "/versions" + optionalQueryParam); } @@ -2036,7 +2131,7 @@ static Response indexClear() { return given() .get("/api/admin/index/clear"); } - + static Response index() { return given() .get("/api/admin/index"); @@ -2309,6 +2404,21 @@ static Response getExternalToolsForDataset(String idOrPersistentIdOfDataset, Str } return requestSpecification.get("/api/admin/test/datasets/" + idInPath + "/externalTools?type=" + type + optionalQueryParam); } + + static Response getExternalToolForDatasetById(String idOrPersistentIdOfDataset, String type, String apiToken, String toolId) { + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/admin/test/datasets/" + idInPath + "/externalTool/" + toolId + "?type=" + type + optionalQueryParam); + } static Response getExternalToolsForFile(String idOrPersistentIdOfFile, String type, String apiToken) { String idInPath = idOrPersistentIdOfFile; // Assume it's a number. @@ -2324,6 +2434,21 @@ static Response getExternalToolsForFile(String idOrPersistentIdOfFile, String ty } return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTools?type=" + type + optionalQueryParam); } + + static Response getExternalToolForFileById(String idOrPersistentIdOfFile, String type, String apiToken, String toolId) { + String idInPath = idOrPersistentIdOfFile; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfFile)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfFile; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/admin/test/files/" + idInPath + "/externalTool/" + toolId + "?type=" + type + optionalQueryParam); + } static Response submitFeedback(JsonObjectBuilder job) { return given() @@ -2361,6 +2486,71 @@ static Response deleteStorageSite(long storageSiteId) { .delete("/api/admin/storageSites/" + storageSiteId); } + static Response listStorageDrivers(String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/storageDrivers"); + } + + static Response getStorageDriver(String dvAlias, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response setStorageDriver(String dvAlias, String label, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(label) + .put("/api/admin/dataverse/" + dvAlias + "/storageDriver"); + } + + static Response getUploadUrls(String idOrPersistentIdOfDataset, long sizeInBytes, String apiToken) { + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "&persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/datasets/" + idInPath + "/uploadurls?size=" + sizeInBytes + optionalQueryParam); + } + + /** + * If you set dataverse.files.localstack1.disable-tagging=true you will see + * an error like below. + * + * To avoid it, don't send the x-amz-tagging header. + */ + /* + + AccessDenied + There were headers present in the request which were not signed + 25ff2bb0-13c7-420e-8ae6-3d92677e4bd9 + 9Gjjt1m+cjU4OPvX9O9/8RuvnG41MRb/18Oux2o5H5MY7ISNTlXN+Dz9IG62/ILVxhAGI0qyPfg= + x-amz-tagging + + */ + static Response uploadFileDirect(String url, InputStream inputStream) { + return given() + .header("x-amz-tagging", "dv-state=temp") + .body(inputStream) + .put(url); + } + + static Response downloadFileNoRedirect(Integer fileId, String apiToken) { + return given().when().redirects().follow(false) + .get("/api/access/datafile/" + fileId + "?key=" + apiToken); + } + + static Response downloadFromUrl(String url) { + return given().get(url); + } + static Response metricsDataversesToMonth(String yyyymm, String queryParams) { String optionalYyyyMm = ""; if (yyyymm != null) { @@ -2412,6 +2602,25 @@ static Response metricsDownloadsToMonth(String yyyymm, String queryParams) { RequestSpecification requestSpecification = given(); return requestSpecification.get("/api/info/metrics/downloads/toMonth" + optionalYyyyMm + optionalQueryParams); } + + static Response metricsAccountsToMonth(String yyyymm, String queryParams) { + String optionalQueryParams = ""; + if (queryParams != null) { + optionalQueryParams = "?" + queryParams; + } + RequestSpecification requestSpecification = given(); + return requestSpecification.get("/api/info/metrics/accounts/toMonth/" + yyyymm + optionalQueryParams); + } + + static Response metricsAccountsTimeSeries(String mediaType, String queryParams) { + String optionalQueryParams = ""; + if (queryParams != null) { + optionalQueryParams = "?" + queryParams; + } + RequestSpecification requestSpecification = given(); + requestSpecification.contentType(mediaType); + return requestSpecification.get("/api/info/metrics/accounts/monthly" + optionalQueryParams); + } static Response metricsDataversesPastDays(String days, String queryParams) { String optionalQueryParams = ""; @@ -2449,6 +2658,15 @@ static Response metricsDownloadsPastDays(String days, String queryParams) { return requestSpecification.get("/api/info/metrics/downloads/pastDays/" + days + optionalQueryParams); } + static Response metricsAccountsPastDays(String days, String queryParams) { + String optionalQueryParams = ""; + if (queryParams != null) { + optionalQueryParams = "?" + queryParams; + } + RequestSpecification requestSpecification = given(); + return requestSpecification.get("/api/info/metrics/accounts/pastDays/" + days + optionalQueryParams); + } + static Response metricsDataversesByCategory(String queryParams) { String optionalQueryParams = ""; if (queryParams != null) { @@ -2628,6 +2846,13 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur i = repeats + 1; } } while ((i <= repeats) && stale); + try { + Thread.sleep(1000); //Current autoSoftIndexTime - which adds a delay to when the new docs are visible + i++; + } catch (InterruptedException ex) { + Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); + i = repeats + 1; + } System.out.println("Waited " + (i * (sleepStep / 1000.0)) + " seconds"); return i <= repeats; @@ -2683,10 +2908,15 @@ static Boolean sleepForDeadlock(int duration) { //Helper function that returns true if a given search returns a non-zero response within a fixed time limit // a given duration returns false if still zero results after given duration - static Boolean sleepForSearch(String searchPart, String apiToken, String subTree, int duration) { + static Boolean sleepForSearch(String searchPart, String apiToken, String subTree, int count, int duration) { Response searchResponse = UtilIT.search(searchPart, apiToken, subTree); + //Leave early if search isn't working + if(searchResponse.statusCode()!=200) { + logger.warning("Non-200 status in sleepForSearch: " + searchResponse.statusCode()); + return false; + } int i = 0; do { try { @@ -2699,8 +2929,8 @@ static Boolean sleepForSearch(String searchPart, String apiToken, String subTre } catch (InterruptedException ex) { Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex); } - } while (UtilIT.getSearchCountFromResponse(searchResponse) == 0); - + } while (UtilIT.getSearchCountFromResponse(searchResponse) != count); + logger.info("Waited " + i + " seconds in sleepForSearch"); return i <= duration; } @@ -2955,6 +3185,19 @@ static Response makeDataCountUpdateCitationsForDataset(String idOrPersistentIdOf return requestSpecification.post("/api/admin/makeDataCount/" + idInPath + "/updateCitationsForDataset"+ optionalQueryParam); } + static Response makeDataCountGetProcessingState(String yearMonth) { + RequestSpecification requestSpecification = given(); + return requestSpecification.get("/api/admin/makeDataCount/" + yearMonth + "/processingState"); + } + static Response makeDataCountUpdateProcessingState(String yearMonth, String state) { + RequestSpecification requestSpecification = given(); + return requestSpecification.post("/api/admin/makeDataCount/" + yearMonth + "/processingState?state=" + state); + } + static Response makeDataCountDeleteProcessingState(String yearMonth) { + RequestSpecification requestSpecification = given(); + return requestSpecification.delete("/api/admin/makeDataCount/" + yearMonth + "/processingState"); + } + static Response editDDI(String body, String fileId, String apiToken) { if (apiToken == null) { apiToken = ""; @@ -2983,6 +3226,31 @@ static Response findDataverseStorageSize(String dataverseId, String apiToken) { .get("/api/dataverses/" + dataverseId + "/storagesize"); } + static Response checkCollectionQuota(String collectionId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/" + collectionId + "/storage/quota"); + } + + static Response setCollectionQuota(String collectionId, long allocatedSize, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .post("/api/dataverses/" + collectionId + "/storage/quota/" + allocatedSize); + return response; + } + + static Response disableCollectionQuota(String collectionId, String apiToken) { + Response response = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/dataverses/" + collectionId + "/storage/quota"); + return response; + } + + static Response checkCollectionStorageUse(String collectionId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/" + collectionId + "/storage/use"); + } /** * Determine the "payload" storage size of a dataverse @@ -3045,6 +3313,33 @@ static Response addDataverseRole(String pathToJsonFile, String dvAlias, String a .post("/api/roles?dvo="+dvAlias); return addBannerMessageResponse; } + + static Response addFeaturedDataverse (String dvAlias, String featuredDvAlias, String apiToken) { + + String jsonString = "[\"" + featuredDvAlias + "\"]"; + + Response addFeaturedDataverseResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(jsonString) + .post("/api/dataverses/"+dvAlias+"/featured/"); + return addFeaturedDataverseResponse; + } + + static Response deleteFeaturedDataverses (String dvAlias, String apiToken) { + + Response deleteFeaturedDataversesResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .delete("/api/dataverses/"+dvAlias+"/featured/"); + return deleteFeaturedDataversesResponse; + } + + static Response getFeaturedDataverses (String dvAlias, String apiToken) { + + Response deleteFeaturedDataversesResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/dataverses/"+dvAlias+"/featured/"); + return deleteFeaturedDataversesResponse; + } static Response deleteDataverseRole( String roleAlias, String apiToken) { @@ -3092,20 +3387,6 @@ static Response deleteBannerMessage(Long id) { return deleteBannerMessageResponse; } - static String getBannerMessageIdFromResponse(String getBannerMessagesResponse) { - StringReader rdr = new StringReader(getBannerMessagesResponse); - JsonObject json = Json.createReader(rdr).readObject(); - - for (JsonObject obj : json.getJsonArray("data").getValuesAs(JsonObject.class)) { - String message = obj.getString("displayValue"); - if (message.equals("Banner Message For Deletion")) { - return obj.getJsonNumber("id").toString(); - } - } - - return "0"; - } - static Response getDatasetJsonLDMetadata(Integer datasetId, String apiToken) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) @@ -3345,14 +3626,31 @@ static Response getPrivateUrlDatasetVersionCitation(String privateUrlToken) { return response; } - static Response getDatasetVersionCitation(Integer datasetId, String version, String apiToken) { + static Response getDatasetVersionCitation(Integer datasetId, String version, boolean includeDeaccessioned, String apiToken) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) .contentType("application/json") + .queryParam("includeDeaccessioned", includeDeaccessioned) .get("/api/datasets/" + datasetId + "/versions/" + version + "/citation"); return response; } + static Response getFileCitation(Integer fileId, String datasetVersion, String apiToken) { + Boolean includeDeaccessioned = null; + return getFileCitation(fileId, datasetVersion, includeDeaccessioned, apiToken); + } + + static Response getFileCitation(Integer fileId, String datasetVersion, Boolean includeDeaccessioned, String apiToken) { + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification.header(API_TOKEN_HTTP_HEADER, apiToken); + } + if (includeDeaccessioned != null) { + requestSpecification.queryParam("includeDeaccessioned", includeDeaccessioned); + } + return requestSpecification.get("/api/files/" + fileId + "/versions/" + datasetVersion + "/citation"); + } + static Response getVersionFiles(Integer datasetId, String version, Integer limit, @@ -3441,6 +3739,12 @@ static Response getUserPermissionsOnDataset(String datasetId, String apiToken) { .get("/api/datasets/" + datasetId + "/userPermissions"); } + static Response getCanDownloadAtLeastOneFile(String datasetId, String versionId, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/datasets/" + datasetId + "/versions/" + versionId + "/canDownloadAtLeastOneFile"); + } + static Response createFileEmbargo(Integer datasetId, Integer fileId, String dateAvailable, String apiToken) { JsonObjectBuilder jsonBuilder = Json.createObjectBuilder(); jsonBuilder.add("dateAvailable", dateAvailable); @@ -3455,6 +3759,20 @@ static Response createFileEmbargo(Integer datasetId, Integer fileId, String date .post("/api/datasets/" + datasetId + "/files/actions/:set-embargo"); } + static Response createFileRetention(Integer datasetId, Integer fileId, String dateUnavailable, String apiToken) { + JsonObjectBuilder jsonBuilder = Json.createObjectBuilder(); + jsonBuilder.add("dateUnavailable", dateUnavailable); + jsonBuilder.add("reason", "This is a test retention"); + jsonBuilder.add("fileIds", Json.createArrayBuilder().add(fileId)); + String jsonString = jsonBuilder.build().toString(); + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(jsonString) + .contentType("application/json") + .urlEncodingEnabled(false) + .post("/api/datasets/" + datasetId + "/files/actions/:set-retention"); + } + static Response getVersionFileCounts(Integer datasetId, String version, String contentType, @@ -3527,17 +3845,38 @@ static Response getHasBeenDeleted(String dataFileId, String apiToken) { .get("/api/files/" + dataFileId + "/hasBeenDeleted"); } - static Response deaccessionDataset(Integer datasetId, String version, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + static Response deaccessionDataset(int datasetId, String version, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + return deaccessionDataset(String.valueOf(datasetId), version, deaccessionReason, deaccessionForwardURL, apiToken); + } + + static Response deaccessionDataset(String datasetIdOrPersistentId, String versionId, String deaccessionReason, String deaccessionForwardURL, String apiToken) { + + String idInPath = datasetIdOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId; + } + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); jsonObjectBuilder.add("deaccessionReason", deaccessionReason); if (deaccessionForwardURL != null) { jsonObjectBuilder.add("deaccessionForwardURL", deaccessionForwardURL); } + String jsonString = jsonObjectBuilder.build().toString(); + StringBuilder query = new StringBuilder() + .append("/api/datasets/") + .append(idInPath) + .append("/versions/") + .append(versionId) + .append("/deaccession") + .append(optionalQueryParam); + return given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .body(jsonString) - .post("/api/datasets/" + datasetId + "/versions/" + version + "/deaccession"); + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(jsonString) + .post(query.toString()); } static Response getDownloadSize(Integer datasetId, @@ -3574,4 +3913,77 @@ static Response getDownloadSize(Integer datasetId, return requestSpecification .get("/api/datasets/" + datasetId + "/versions/" + version + "/downloadsize"); } + + static Response downloadTmpFile(String fullyQualifiedPathToFile, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .get("/api/admin/downloadTmpFile?fullyQualifiedPathToFile=" + fullyQualifiedPathToFile); + } + + static Response setDatasetStorageDriver(Integer datasetId, String driverLabel, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(driverLabel) + .put("/api/datasets/" + datasetId + "/storageDriver"); + } + + + //Globus Store related - not currently used + + static Response getDatasetGlobusUploadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusUploadParameters?locale=" + locale); + } + + static Response getDatasetGlobusDownloadParameters(Integer datasetId, String locale, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/datasets/" + datasetId + "/globusDownloadParameters?locale=" + locale); + } + + static Response requestGlobusDownload(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusDownload"); + } + + static Response requestGlobusUploadPaths(Integer datasetId, JsonObject body, String apiToken) { + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(body.toString()) + .contentType("application/json") + .post("/api/datasets/" + datasetId + "/requestGlobusUploadPaths"); + } + + public static Response updateDataverseInputLevels(String dataverseAlias, String[] inputLevelNames, boolean[] requiredInputLevels, boolean[] includedInputLevels, String apiToken) { + JsonArrayBuilder inputLevelsArrayBuilder = Json.createArrayBuilder(); + for (int i = 0; i < inputLevelNames.length; i++) { + inputLevelsArrayBuilder.add(createInputLevelObject(inputLevelNames[i], requiredInputLevels[i], includedInputLevels[i])); + } + return given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(inputLevelsArrayBuilder.build().toString()) + .contentType(ContentType.JSON) + .put("/api/dataverses/" + dataverseAlias + "/inputLevels"); + } + + private static JsonObjectBuilder createInputLevelObject(String name, boolean required, boolean include) { + return Json.createObjectBuilder() + .add("datasetFieldTypeName", name) + .add("required", required) + .add("include", include); + } + + public static Response getOpenAPI(String accept, String format) { + Response response = given() + .header("Accept", accept) + .queryParam("format", format) + .get("/openapi"); + return response; + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java index 672d7563669..3a63371d7a8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/OAuth2LoginBackingBeanTest.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.authorization.providers.oauth2; import edu.harvard.iq.dataverse.DataverseSession; +import edu.harvard.iq.dataverse.UserServiceBean; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2APTest; @@ -48,6 +49,7 @@ class OAuth2LoginBackingBeanTest { @Mock AuthenticationServiceBean authenticationServiceBean; @Mock SystemConfig systemConfig; + @Mock UserServiceBean userService; Clock constantClock = Clock.fixed(Instant.now(), ZoneId.systemDefault()); @@ -70,6 +72,7 @@ void setUp() { this.loginBackingBean.clock = constantClock; this.loginBackingBean.authenticationSvc = this.authenticationServiceBean; this.loginBackingBean.systemConfig = this.systemConfig; + this.loginBackingBean.userService = this.userService; lenient().when(this.authenticationServiceBean.getOAuth2Provider(testIdp.getId())).thenReturn(testIdp); } @@ -178,6 +181,7 @@ void existingUser() throws Exception { // also fake the result of the lookup in the auth service doReturn(userIdentifier).when(userRecord).getUserRecordIdentifier(); doReturn(user).when(authenticationServiceBean).lookupUser(userIdentifier); + doReturn(user).when(userService).updateLastLogin(user); // WHEN (& then) // capture the redirect target from the faces context diff --git a/src/test/java/edu/harvard/iq/dataverse/branding/BrandingUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/branding/BrandingUtilTest.java index 2b526b8a449..0a6d89ed490 100644 --- a/src/test/java/edu/harvard/iq/dataverse/branding/BrandingUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/branding/BrandingUtilTest.java @@ -41,8 +41,8 @@ public static void setupMocks() { BrandingUtil.injectServices(dataverseSvc, settingsSvc); // initial values (needed here for other tests where this method is reused!) - Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); - Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); + Mockito.lenient().when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); + Mockito.lenient().when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); } /** @@ -50,7 +50,7 @@ public static void setupMocks() { * @param installationName */ public static void setInstallationName(String installationName) { - Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(installationName); + Mockito.lenient().when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(installationName); } /** @@ -58,7 +58,7 @@ public static void setInstallationName(String installationName) { * @param rootDataverseName */ public static void setRootDataverseName(String rootDataverseName) { - Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(rootDataverseName); + Mockito.lenient().when(dataverseSvc.getRootDataverseName()).thenReturn(rootDataverseName); } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java index 1ff914adff9..f7ce061fb24 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/DataAccessTest.java @@ -59,4 +59,24 @@ void testCreateNewStorageIO_createsFileAccessIObyDefault() throws IOException { StorageIO storageIo = DataAccess.createNewStorageIO(dataset, "valid-tag"); assertTrue(storageIo.getClass().equals(FileAccessIO.class)); } + + @Test + void testGetLocationFromStorageId() { + Dataset d = new Dataset(); + d.setAuthority("10.5072"); + d.setIdentifier("FK2/ABCDEF"); + assertEquals("s3://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("s3://18b39722140-50eb7d3c5ece", d)); + assertEquals("10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece", + DataAccess.getLocationFromStorageId("18b39722140-50eb7d3c5ece", d)); + + } + + @Test + void testGetStorageIdFromLocation() { + assertEquals("file://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("file://10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + assertEquals("s3://18b39722140-50eb7d3c5ece", + DataAccess.getStorageIdFromLocation("s3://bucketname:10.5072/FK2/ABCDEF/18b39722140-50eb7d3c5ece")); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java new file mode 100644 index 00000000000..d173f65757f --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/GlobusOverlayAccessIOTest.java @@ -0,0 +1,151 @@ +/* + * SPDX-License-Identifier: Apache 2.0 + */ +package edu.harvard.iq.dataverse.dataaccess; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import static org.junit.jupiter.api.Assertions.*; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import java.io.IOException; +import java.nio.file.Paths; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.STRICT_STUBS) +public class GlobusOverlayAccessIOTest { + + @Mock + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeAll + public static void setUp() { + // Base Store + System.setProperty("dataverse.files.base.type", DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER); + System.setProperty("dataverse.files.base.label", "default"); + System.setProperty("dataverse.files.base.directory", "/tmp/files"); + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + // Nonsense value of the right form + System.setProperty("dataverse.files.globusm.globus-token", + "NzM2NTQxMDMtOTg1Yy00NDgzLWE1MTYtYTJlNDk0ZmI3MDhkOkpJZGZaZGxMZStQNUo3MTRIMDY2cDh6YzIrOXI2RmMrbFR6UG0zcSsycjA9"); + System.setProperty("dataverse.files.globusm.remote-store-name", "GlobusEndpoint1"); + System.setProperty("dataverse.files.globusm.type", "globus"); + System.setProperty("dataverse.files.globusm.managed", "true"); + System.setProperty("dataverse.files.globusm.base-store", "base"); + System.setProperty("dataverse.files.globusm.label", "globusManaged"); + + // Remote Store + System.setProperty("dataverse.files.globusr.type", "globus"); + System.setProperty("dataverse.files.globusr.base-store", "base"); + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty("dataverse.files.globusr.label", "globusRemote"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusr.remote-store-name", "DemoDataCorp"); + + } + + @AfterAll + public static void tearDown() { + System.clearProperty("dataverse.files.base.type"); + System.clearProperty("dataverse.files.base.label"); + System.clearProperty("dataverse.files.base.directory"); + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.globus-token"); + System.clearProperty("dataverse.files.globusm.remote-store-name"); + System.clearProperty("dataverse.files.globusm.type"); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusm.base-store"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty("dataverse.files.globusr.type"); + System.clearProperty("dataverse.files.globusr.base-store"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty("dataverse.files.globusm.label"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + System.clearProperty("dataverse.files.globusr.remote-store-name"); + } + + @Test + void testGlobusOverlayIdentifiers() throws IOException { + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, authority, identifier, "/", + AbstractDOIProvider.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusm", mDatafile.getStorageIdentifier())); + assertTrue(GlobusOverlayAccessIO.isValidIdentifier("globusr", rDatafile.getStorageIdentifier())); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusm", "globusr://localid//../of/the/hill")); + assertFalse(GlobusOverlayAccessIO.isValidIdentifier("globusr", + rDatafile.getStorageIdentifier().replace("hdc1", ""))); + + // We can read the storageIdentifier and get the driver + assertTrue(mDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(mDatafile.getStorageIdentifier()))); + assertTrue(rDatafile.getStorageIdentifier() + .startsWith(DataAccess.getStorageDriverFromIdentifier(rDatafile.getStorageIdentifier()))); + + // We can get the driver type from it's ID + assertTrue(DataAccess.getDriverType("globusm").equals(System.getProperty("dataverse.files.globusm.type"))); + assertTrue(DataAccess.getDriverType("globusr").equals(System.getProperty("dataverse.files.globusr.type"))); + + // When we get a StorageIO for the file, it is the right type + StorageIO mStorageIO = DataAccess.getStorageIO(mDatafile); + assertTrue(mStorageIO instanceof GlobusOverlayAccessIO); + StorageIO rStorageIO = DataAccess.getStorageIO(rDatafile); + assertTrue(rStorageIO instanceof GlobusOverlayAccessIO); + + // When we use it, we can get properties like the remote store name + assertTrue(mStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusm.remote-store-name"))); + assertTrue(rStorageIO.getRemoteStoreName() + .equals(System.getProperty("dataverse.files.globusr.remote-store-name"))); + + // Storage Locations are correct + String mLocation = mStorageIO.getStorageLocation(); + assertEquals("globusm:///" + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + + "/" + baseStoreId1, mLocation); + String rLocation = rStorageIO.getStorageLocation(); + assertEquals("globusr://" + baseStoreId2 + "//" + logoPath, rLocation); + + // If we ask for the path for an aux file, it is correct + System.out.println(Paths.get(System.getProperty("dataverse.files.file.directory", "/tmp/files"), authority, + identifier, baseStoreId1 + ".auxobject").toString()); + System.out.println(mStorageIO.getAuxObjectAsPath("auxobject").toString()); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId1 + ".auxobject").equals(mStorageIO.getAuxObjectAsPath("auxobject"))); + assertTrue(Paths.get(System.getProperty("dataverse.files.base.directory", "/tmp/files"), authority, identifier, + baseStoreId2 + ".auxobject").equals(rStorageIO.getAuxObjectAsPath("auxobject"))); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java index 5affc01aff0..2c0e0a5c6b7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/RemoteOverlayAccessIOTest.java @@ -4,12 +4,11 @@ */ package edu.harvard.iq.dataverse.dataaccess; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import org.junit.jupiter.api.AfterEach; @@ -51,7 +50,7 @@ public void setUp() { System.setProperty("dataverse.files.file.label", "default"); datafile = MocksFactory.makeDataFile(); dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, authority, identifier, "/", DOIServiceBean.DOI_RESOLVER_URL, null)); + dataset.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, authority, identifier, "/", AbstractDOIProvider.DOI_RESOLVER_URL, null)); datafile.setOwner(dataset); datafile.setStorageIdentifier("test://" + baseStoreId + "//" + logoPath); diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java new file mode 100644 index 00000000000..140b558fc1d --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIOLocalstackIT.java @@ -0,0 +1,153 @@ +package edu.harvard.iq.dataverse.dataaccess; + +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.testing.Tags; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Scanner; +import java.util.UUID; +import static org.junit.jupiter.api.Assertions.assertEquals; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.testcontainers.containers.localstack.LocalStackContainer; +import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; + +// https://java.testcontainers.org/modules/localstack/ +@Tag(Tags.INTEGRATION_TEST) +@Tag(Tags.USES_TESTCONTAINERS) +@Testcontainers(disabledWithoutDocker = true) +@ExtendWith(MockitoExtension.class) +class S3AccessIOLocalstackIT { + + @BeforeAll + static void setUp() { + System.setProperty(staticFiles + "access-key", localstack.getAccessKey()); + System.setProperty(staticFiles + "secret-key", localstack.getSecretKey()); + System.setProperty(staticFiles + "custom-endpoint-url", localstack.getEndpoint().toString()); + System.setProperty(staticFiles + "custom-endpoint-region", localstack.getRegion()); + System.setProperty(staticFiles + "bucket-name", bucketName); + + s3 = AmazonS3ClientBuilder + .standard() + .withEndpointConfiguration( + new AwsClientBuilder.EndpointConfiguration( + localstack.getEndpoint().toString(), + localstack.getRegion() + ) + ) + .withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(localstack.getAccessKey(), localstack.getSecretKey()) + ) + ) + .build(); + s3.createBucket(bucketName); + } + + static final String storageDriverId = "si1"; + static final String staticFiles = "dataverse.files." + storageDriverId + "."; + static final String bucketName = "bucket-" + UUID.randomUUID().toString(); + static AmazonS3 s3 = null; + + static DockerImageName localstackImage = DockerImageName.parse("localstack/localstack:2.3.2"); + @Container + static LocalStackContainer localstack = new LocalStackContainer(localstackImage) + .withServices(S3); + + //new S3AccessIO<>(dvObject, req, storageDriverId); + @Test + void test1() { + DvObject dvObject = new Dataset(); + dvObject.setProtocol("doi"); + dvObject.setAuthority("10.5072/FK2"); + dvObject.setIdentifier("ABC123"); + DataAccessRequest req = null; + S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, storageDriverId); + String textIn = "Hello"; + InputStream inputStream = new ByteArrayInputStream(textIn.getBytes()); + // Without this temp directory, saveInputStream fails + String tempDirPath = "/tmp/dataverse/temp"; + try { + Files.createDirectories(Paths.get(tempDirPath)); + } catch (IOException ex) { + System.out.println("failed to create " + tempDirPath + ": " + ex); + } + try { + s3AccessIO.saveInputStream(inputStream); + System.out.println("save complete!"); + } catch (IOException ex) { + System.out.println("saveInputStream exception: " + ex); + } + + String textOut = null; + try { + textOut = new Scanner(s3AccessIO.getInputStream()).useDelimiter("\\A").next(); + } catch (IOException ex) { + } + assertEquals(textIn, textOut); + } + + // testing a specific constructor + @Test + void test2() { + Dataset dataset = new Dataset(); + dataset.setProtocol("doi"); + dataset.setAuthority("10.5072/FK2"); + dataset.setIdentifier("ABC123"); + String sid = sid = bucketName + dataset.getAuthorityForFileStorage() + "/" + dataset.getIdentifierForFileStorage() + "/" + FileUtil.generateStorageIdentifier(); + S3AccessIO s3io = new S3AccessIO(sid, storageDriverId); + } + + // just to test this: saveInputStream exception: java.io.IOException: ERROR: s3 not initialised + @Test + void test3() { + DvObject dvObject = new Dataset(); + dvObject.setProtocol("doi"); + dvObject.setAuthority("10.5072/FK2"); + dvObject.setIdentifier("ABC123"); + DataAccessRequest req = null; + AmazonS3 nullAmazonS3 = null; + S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, nullAmazonS3, storageDriverId); + InputStream inputStream = null; + try { + s3AccessIO.saveInputStream(inputStream); + System.out.println("save complete!"); + } catch (IOException ex) { + System.out.println("saveInputStream exception: " + ex); + } + } + + @Test + void test4() { + DvObject dvObject = new DataFile(); + dvObject.setProtocol("doi"); + dvObject.setAuthority("10.5072/FK2"); + dvObject.setIdentifier("ABC123"); + DataAccessRequest req = null; + S3AccessIO s3AccessIO = new S3AccessIO<>(dvObject, req, storageDriverId); + InputStream inputStream = null; + try { + s3AccessIO.saveInputStream(inputStream); + System.out.println("save complete!"); + } catch (IOException ex) { + System.out.println("saveInputStream exception: " + ex); + } + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java index 2ed9d18036d..84a241b90f6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/dataaccess/StorageIOTest.java @@ -243,4 +243,16 @@ public void testGenerateVariableHeader() { assertEquals("Random Random\n", instance.generateVariableHeader(dvs)); assertEquals(null, instance.generateVariableHeader(null)); } + + @Test + public void testGetConfigParam() { + System.setProperty("dataverse.files.globus.type", "globus"); + assertEquals("globus", StorageIO.getConfigParamForDriver("globus", StorageIO.TYPE)); + System.clearProperty("dataverse.files.globus.type"); + } + + @Test + public void testGetConfigParamWithDefault() { + assertEquals(DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER, StorageIO.getConfigParamForDriver("globus", AbstractRemoteOverlayAccessIO.BASE_STORE, DataAccess.DEFAULT_STORAGE_DRIVER_IDENTIFIER)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java index 99da9198296..f2c03adea20 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/TestCommandContext.java @@ -11,8 +11,7 @@ import edu.harvard.iq.dataverse.engine.command.Command; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; -import edu.harvard.iq.dataverse.pidproviders.FakePidProviderServiceBean; -import edu.harvard.iq.dataverse.pidproviders.PermaLinkPidProviderServiceBean; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.search.IndexBatchServiceBean; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -20,6 +19,7 @@ import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.storageuse.StorageUseServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.workflow.WorkflowServiceBean; import java.util.Stack; @@ -121,27 +121,7 @@ public DataverseFieldTypeInputLevelServiceBean fieldTypeInputLevels() { } @Override - public DOIEZIdServiceBean doiEZId() { - return null; - } - - @Override - public DOIDataCiteServiceBean doiDataCite() { - return null; - } - - @Override - public FakePidProviderServiceBean fakePidProvider() { - return null; - } - - @Override - public HandlenetServiceBean handleNet() { - return null; - } - - @Override - public PermaLinkPidProviderServiceBean permaLinkProvider() { + public PidProviderFactoryBean pidProviderFactory() { return null; } @@ -170,6 +150,11 @@ public DatasetLinkingServiceBean dsLinking() { return null; } + @Override + public DatasetFieldServiceBean dsField() { + return null; + } + @Override public AuthenticationServiceBean authentication() { return null; @@ -245,6 +230,16 @@ public ActionLogServiceBean actionLog() { return null; } + @Override + public MetadataBlockServiceBean metadataBlocks() { + return null; + } + + @Override + public StorageUseServiceBean storageUse() { + return null; + } + @Override public void beginCommandSequence() { throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommandTest.java index 33f9acd0e1a..508eac46cb4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreatePrivateUrlCommandTest.java @@ -2,24 +2,32 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseRoleServiceBean; +import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.RoleAssignment; import edu.harvard.iq.dataverse.authorization.DataverseRole; +import edu.harvard.iq.dataverse.authorization.RoleAssignee; import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser; import edu.harvard.iq.dataverse.engine.TestCommandContext; import edu.harvard.iq.dataverse.engine.TestDataverseEngine; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.search.IndexResponse; +import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.Future; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; public class CreatePrivateUrlCommandTest { @@ -73,6 +81,10 @@ public RoleAssignment save(RoleAssignment assignment) { // no-op return assignment; } + @Override + public List directRoleAssignments(RoleAssignee roas, DvObject dvo) { + return List.of(); + } }; } @@ -89,6 +101,16 @@ public String getDataverseSiteUrl() { }; } + + @Override + public SolrIndexServiceBean solrIndex() { + return new SolrIndexServiceBean(){ + @Override + public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) { + return null; + } + }; + } } ); diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java index 23cc4547bc4..68c44764dff 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/ReturnDatasetToAuthorCommandTest.java @@ -21,6 +21,9 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.mocks.MocksFactory; import edu.harvard.iq.dataverse.search.IndexServiceBean; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import edu.harvard.iq.dataverse.workflows.WorkflowComment; import java.util.Collections; import java.util.List; @@ -29,10 +32,10 @@ import jakarta.servlet.http.HttpServletRequest; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.*; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +@LocalJvmSettings public class ReturnDatasetToAuthorCommandTest { private Dataset dataset; @@ -142,6 +145,7 @@ public List getUsersWithPermissionOn(Permission permission, D } */ @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "disable-return-to-author-reason") void testDatasetNull() { assertThrows(IllegalArgumentException.class, () -> new ReturnDatasetToAuthorCommand(dataverseRequest, null, "")); @@ -155,7 +159,7 @@ public void testReleasedDataset() { String actual = null; Dataset updatedDataset = null; try { - updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "")); + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "Update Your Files, Dummy")); } catch (CommandException ex) { actual = ex.getMessage(); } @@ -171,36 +175,55 @@ public void testNotInReviewDataset() { String actual = null; Dataset updatedDataset = null; try { - updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "")); + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "Update Your Files, Dummy")); } catch (CommandException ex) { actual = ex.getMessage(); } assertEquals(expected, actual); } - /* - FIXME - Empty Comments won't be allowed in future @Test - public void testEmptyComments(){ - - dataset.setIdentifier("DUMMY"); + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "false", varArgs = "disable-return-to-author-reason") + public void testEmptyOrNullComment(){ dataset.getLatestVersion().setVersionState(DatasetVersion.VersionState.DRAFT); - dataset.getLatestVersion().setInReview(true); - dataset.getLatestVersion().setReturnReason(null); + Dataset updatedDataset = null; String expected = "You must enter a reason for returning a dataset to the author(s)."; String actual = null; - Dataset updatedDataset = null; try { - - updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset)); - } catch (CommandException ex) { + testEngine.submit( new AddLockCommand(dataverseRequest, dataset, + new DatasetLock(DatasetLock.Reason.InReview, dataverseRequest.getAuthenticatedUser()))); + + assertThrowsExactly(IllegalArgumentException.class, + () -> new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, null), expected); + assertThrowsExactly(IllegalArgumentException.class, + () -> new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, ""), expected); + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "")); + } catch (IllegalArgumentException | CommandException ex) { actual = ex.getMessage(); } - assertEquals(expected, actual); - - + assertEquals(expected, actual); } + + /** Test the disable reason flag + * @throws Exception when the test is in error. */ + @Test + @JvmSetting(key = JvmSettings.FEATURE_FLAG, value = "true", varArgs = "disable-return-to-author-reason") + public void testEmptyOrNullCommentWhenDisabled() throws Exception { + dataset.getLatestVersion().setVersionState(DatasetVersion.VersionState.DRAFT); + Dataset updatedDataset = null; + + testEngine.submit(new AddLockCommand(dataverseRequest, dataset, + new DatasetLock(DatasetLock.Reason.InReview, dataverseRequest.getAuthenticatedUser()))); + + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, null)); + assertNotNull(updatedDataset); + testEngine.submit(new AddLockCommand(dataverseRequest, dataset, + new DatasetLock(DatasetLock.Reason.InReview, dataverseRequest.getAuthenticatedUser()))); + updatedDataset = testEngine.submit(new ReturnDatasetToAuthorCommand(dataverseRequest, dataset, "")); + assertNotNull(updatedDataset); + } + @Test public void testAllGood() { diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommandTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommandTest.java index 34ea7810574..ead652ad774 100644 --- a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommandTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetThumbnailCommandTest.java @@ -8,6 +8,7 @@ import edu.harvard.iq.dataverse.engine.TestCommandContext; import edu.harvard.iq.dataverse.engine.TestDataverseEngine; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -15,6 +16,8 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import java.util.List; + import static org.junit.jupiter.api.Assertions.*; public class UpdateDatasetThumbnailCommandTest { @@ -136,7 +139,7 @@ public void testSetDatasetFileAsThumbnailFileNull() { @Test public void testSetDatasetFileAsThumbnailFileNotFound() { - String expected = "Could not find file based on id supplied: 1."; + String expected = BundleUtil.getStringFromBundle("datasets.api.thumbnail.fileNotFound", List.of("1")); String actual = null; DatasetThumbnail datasetThumbnail = null; try { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 76ca853d5cc..2da15147255 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -7,12 +7,13 @@ import com.google.gson.Gson; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.HandlenetServiceBean; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.export.openaire.OpenAireExportUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; + import java.io.IOException; import java.io.StringWriter; import java.nio.charset.StandardCharsets; @@ -56,7 +57,7 @@ public void testWriteIdentifierElementDoi() throws XMLStreamException { String persistentAgency = "doi"; String persistentAuthority = "10.123"; String persistentId = "123"; - GlobalId globalId = new GlobalId(persistentAgency, persistentAuthority, persistentId, null, DOIServiceBean.DOI_RESOLVER_URL, null); + GlobalId globalId = new GlobalId(persistentAgency, persistentAuthority, persistentId, null, AbstractDOIProvider.DOI_RESOLVER_URL, null); // when OpenAireExportUtil.writeIdentifierElement(xmlWriter, globalId.asURL(), null); @@ -76,7 +77,7 @@ public void testWriteIdentifierElementHandle() throws XMLStreamException { String persistentAgency = "hdl"; String persistentAuthority = "1902.1"; String persistentId = "111012"; - GlobalId globalId = new GlobalId(persistentAgency, persistentAuthority, persistentId, null, HandlenetServiceBean.HDL_RESOLVER_URL, null); + GlobalId globalId = new GlobalId(persistentAgency, persistentAuthority, persistentId, null, HandlePidProvider.HDL_RESOLVER_URL, null); // when OpenAireExportUtil.writeIdentifierElement(xmlWriter, globalId.asURL(), null); @@ -304,8 +305,10 @@ public void testSubjectsElement() throws XMLStreamException, IOException { + "Engineering" + "Law" + "KeywordTerm1" + "KeywordTerm2" + "", stringWriter.toString()); @@ -927,15 +930,15 @@ public void testWriteGeoLocationElement() throws XMLStreamException, IOException + "" + "10" + "20" - + "30" + "40" + + "30" + "" + "" + "" + "" + + "60" + "80" + "70" - + "60" + "50" + "" + "", @@ -966,8 +969,8 @@ public void testWriteGeoLocationElement2() throws XMLStreamException, IOExceptio + "" + "23" + "786" - + "45" + "34" + + "45" + "" + "", stringWriter.toString()); diff --git a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java index d600ccac53c..2139589b4c3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/SchemaDotOrgExporterTest.java @@ -322,6 +322,7 @@ private static void mockDatasetFieldSvc() { DatasetFieldType keywordType = datasetFieldTypeSvc.add(new DatasetFieldType("keyword", DatasetFieldType.FieldType.TEXT, true)); Set keywordChildTypes = new HashSet<>(); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordValue", DatasetFieldType.FieldType.TEXT, false))); + keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordTermURI", DatasetFieldType.FieldType.TEXT, false))); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordVocabulary", DatasetFieldType.FieldType.TEXT, false))); keywordChildTypes.add(datasetFieldTypeSvc.add(new DatasetFieldType("keywordVocabularyURI", DatasetFieldType.FieldType.TEXT, false))); keywordType.setChildDatasetFieldTypes(keywordChildTypes); diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index c9e429729df..431f069cb03 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -271,6 +271,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -291,6 +297,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -893,14 +905,14 @@ "typeClass": "primitive", "value": "20" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "30" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "40" @@ -919,14 +931,14 @@ "typeClass": "primitive", "value": "60" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "70" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "80" diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations-comma.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations-comma.txt index a8b209d379c..dc9ea198f62 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations-comma.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations-comma.txt @@ -260,16 +260,16 @@ "typeName": "eastLongitude", "value": "23" }, - "northLongitude": { + "northLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "northLongitude", + "typeName": "northLatitude", "value": "786" }, - "southLongitude": { + "southLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "southLongitude", + "typeName": "southLatitude", "value": "34" }, "westLongitude": { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations.txt index b852e79c26a..5601cd64161 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-organizations.txt @@ -292,16 +292,16 @@ "typeName": "eastLongitude", "value": "23" }, - "northLongitude": { + "northLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "northLongitude", + "typeName": "northLatitude", "value": "786" }, - "southLongitude": { + "southLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "southLongitude", + "typeName": "southLatitude", "value": "34" }, "westLongitude": { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-simplified.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-simplified.txt index a91099197b9..9bada051c61 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-simplified.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-simplified.txt @@ -306,16 +306,16 @@ "typeName": "eastLongitude", "value": "23" }, - "northLongitude": { + "northLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "northLongitude", + "typeName": "northLatitude", "value": "786" }, - "southLongitude": { + "southLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "southLongitude", + "typeName": "southLatitude", "value": "34" }, "westLongitude": { diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json index bdff949bb36..9cf04bd0e05 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-create-new-all-ddi-fields.json @@ -230,6 +230,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -250,6 +256,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -866,14 +878,14 @@ "typeClass": "primitive", "value": "-70" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "43" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "42" @@ -892,14 +904,14 @@ "typeClass": "primitive", "value": "-13" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "29" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "28" diff --git a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json index 9bdc7e45349..2d4ca078962 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json +++ b/src/test/java/edu/harvard/iq/dataverse/export/ddi/dataset-finch1.json @@ -238,6 +238,12 @@ "typeClass": "primitive", "value": "Keyword Value 1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -258,6 +264,12 @@ "typeClass": "primitive", "value": "Keyword Value Two" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -371,14 +383,14 @@ "typeClass": "primitive", "value": "59.8" }, - "southLongitude": { - "typeName": "southLongitude", + "southLatitude": { + "typeName": "southLatitude", "multiple": false, "typeClass": "primitive", "value": "41.6" }, - "northLongitude": { - "typeName": "northLongitude", + "northLatitude": { + "typeName": "northLatitude", "multiple": false, "typeClass": "primitive", "value": "43.8" diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java index 21bb6633204..639a7c542c4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolHandlerTest.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.externaltools; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.Dataset; @@ -9,7 +8,9 @@ import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; @@ -53,7 +54,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { Exception expectedException1 = null; String nullLocaleCode = null; try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -71,7 +72,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { DataFile dataFile = new DataFile(); dataFile.setId(42l); try { - ExternalToolHandler externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler1 = new ExternalToolHandler(externalTool, dataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException1 = ex; } @@ -92,7 +93,7 @@ public void testGetToolUrlWithOptionalQueryParameters() { .build().toString()); Exception expectedException2 = null; try { - ExternalToolHandler externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); + URLTokenUtil externalToolHandler2 = new ExternalToolHandler(externalTool, nullDataFile, nullApiToken, nullFileMetadata, nullLocaleCode); } catch (Exception ex) { expectedException2 = ex; } @@ -225,10 +226,10 @@ public void testGetToolUrlWithAllowedApiCalls() { assertTrue(et != null); System.out.println("allowedApiCalls et created"); System.out.println(et.getAllowedApiCalls()); - ExternalToolHandler externalToolHandler = new ExternalToolHandler(et, ds, at, null); + URLTokenUtil externalToolHandler = new ExternalToolHandler(et, ds, at, null); System.out.println("allowedApiCalls eth created"); JsonObject jo = externalToolHandler - .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters()))).build(); + .createPostBody(externalToolHandler.getParams(JsonUtil.getJsonObject(et.getToolParameters())), JsonUtil.getJsonArray(et.getAllowedApiCalls())).build(); assertEquals(1, jo.getJsonObject("queryParameters").getInt("datasetId")); String signedUrl = jo.getJsonArray("signedUrls").getJsonObject(0).getString("signedUrl"); // The date and token will change each time but check for the constant parts of @@ -266,7 +267,7 @@ public void testDatasetConfigureTool() { .build().toString()); var dataset = new Dataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.5072", "ABC123", null, DOIServiceBean.DOI_RESOLVER_URL, null)); + dataset.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.5072", "ABC123", null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); ApiToken nullApiToken = null; String nullLocaleCode = "en"; var externalToolHandler = new ExternalToolHandler(externalTool, dataset, nullApiToken, nullLocaleCode); diff --git a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java index 9337949f605..d2a68ea8da6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/externaltools/ExternalToolServiceBeanTest.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse.externaltools; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataFileServiceBean; import edu.harvard.iq.dataverse.DataTable; @@ -9,6 +8,10 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.util.URLTokenUtil; + import java.util.ArrayList; import java.util.List; import jakarta.json.Json; @@ -29,6 +32,7 @@ public ExternalToolServiceBeanTest() { public void testfindAll() { DataFile dataFile = new DataFile(); dataFile.setId(42l); + dataFile.setStorageIdentifier("test://18debaa2d7c-db98ef7d9a77"); FileMetadata fmd = new FileMetadata(); fmd.setId(2L); DatasetVersion dv = new DatasetVersion(); @@ -49,7 +53,7 @@ public void testfindAll() { externalToolTypes.add(externalToolType); ExternalTool.Scope scope = ExternalTool.Scope.FILE; ExternalTool externalTool = new ExternalTool("displayName", "toolName", "description", externalToolTypes, scope, "http://foo.com", "{}", DataFileServiceBean.MIME_TYPE_TSV_ALT); - ExternalToolHandler externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); + URLTokenUtil externalToolHandler4 = new ExternalToolHandler(externalTool, dataFile, apiToken, fmd, null); List externalTools = new ArrayList<>(); externalTools.add(externalTool); List availableExternalTools = externalToolService.findExternalToolsByFile(externalTools, dataFile); @@ -142,7 +146,7 @@ public void testParseAddFileToolFilePid() { assertEquals("explorer", externalTool.getToolName()); DataFile dataFile = new DataFile(); dataFile.setId(42l); - dataFile.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.5072","FK2/RMQT6J/G9F1A1", "/", DOIServiceBean.DOI_RESOLVER_URL, null)); + dataFile.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL,"10.5072","FK2/RMQT6J/G9F1A1", "/", AbstractDOIProvider.DOI_RESOLVER_URL, null)); FileMetadata fmd = new FileMetadata(); fmd.setId(2L); DatasetVersion dv = new DatasetVersion(); diff --git a/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java new file mode 100644 index 00000000000..095e798f229 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/globus/GlobusUtilTest.java @@ -0,0 +1,88 @@ +package edu.harvard.iq.dataverse.globus; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.mock; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.Mockito; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.dataaccess.AbstractRemoteOverlayAccessIO; +import edu.harvard.iq.dataverse.dataaccess.DataAccess; +import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; +import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonObject; + +public class GlobusUtilTest { + + private Dataset dataset; + private DataFile mDatafile; + private DataFile rDatafile; + private String baseStoreId1 = "182ad2bda2f-c3508e719076"; + private String baseStoreId2 = "182ad2bda2f-c3508e719077"; + private String logoPath = "d7c42580-6538-4605-9ad8-116a61982644/hdc1/image002.mrc"; + private String authority = "10.5072"; + private String identifier = "F2ABCDEF"; + + @BeforeEach + public void setUp() { + + // Managed Globus Store + + // Nonsense endpoint/paths + System.setProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + System.setProperty("dataverse.files.globusm.managed", "true"); + + // Remote Store + System.setProperty("dataverse.files.globusr.managed", "false"); + System.setProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS, + "d7c42580-6538-4605-9ad8-116a61982644/hdc1"); + + dataset = MocksFactory.makeDataset(); + dataset.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, authority, identifier, "/", + AbstractDOIProvider.DOI_RESOLVER_URL, null)); + mDatafile = MocksFactory.makeDataFile(); + mDatafile.setOwner(dataset); + mDatafile.setStorageIdentifier("globusm://" + baseStoreId1); + + rDatafile = MocksFactory.makeDataFile(); + rDatafile.setOwner(dataset); + rDatafile.setStorageIdentifier("globusr://" + baseStoreId2 + "//" + logoPath); + List files = new ArrayList(); + files.add(mDatafile); + files.add(rDatafile); + dataset.setFiles(files); + } + + @AfterEach + public void tearDown() { + System.clearProperty("dataverse.files.globusm." + GlobusAccessibleStore.TRANSFER_ENDPOINT_WITH_BASEPATH); + System.clearProperty("dataverse.files.globusm.managed"); + System.clearProperty("dataverse.files.globusr.managed"); + System.clearProperty( + "dataverse.files.globusr." + AbstractRemoteOverlayAccessIO.REFERENCE_ENDPOINTS_WITH_BASEPATHS); + } + + + @Test + public void testgetFilesMap() { + + JsonObject jo = GlobusUtil.getFilesMap(dataset.getFiles(), dataset); + System.out.println(JsonUtil.prettyPrint(jo)); + assertEquals(jo.getString(Long.toString(mDatafile.getId())), "d7c42580-6538-4605-9ad8-116a61982644/hdc1/10.5072/F2ABCDEF/182ad2bda2f-c3508e719076"); + assertEquals(jo.getString(Long.toString(rDatafile.getId())), logoPath); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java index 96e314324ab..ca64bcc794f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/IngestFrequencyTest.java @@ -99,7 +99,7 @@ private DataFile readFileCalcFreq(String fileName, String type ) { TabularDataIngest tabDataIngest = null; try { - tabDataIngest = ingestPlugin.read(fileInputStream, null); + tabDataIngest = ingestPlugin.read(fileInputStream, false, null); } catch (IOException ingestEx) { tabDataIngest = null; System.out.println("Caught an exception trying to ingest file " + fileName + ": " + ingestEx.getLocalizedMessage()); diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java index fc066ef195e..9afb35918a4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/CSVFileReaderTest.java @@ -52,7 +52,7 @@ public void testRead() { try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - File outFile = instance.read(stream, null).getTabDelimitedFile(); + File outFile = instance.read(stream, false, null).getTabDelimitedFile(); result = new BufferedReader(new FileReader(outFile)); logger.fine("Final pass: " + outFile.getPath()); } catch (IOException ex) { @@ -104,7 +104,7 @@ public void testVariables() { try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - result = instance.read(stream, null).getDataTable(); + result = instance.read(stream, false, null).getDataTable(); } catch (IOException ex) { fail("" + ex); } @@ -154,7 +154,7 @@ public void testSubset() { new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - ingestResult = instance.read(stream, null); + ingestResult = instance.read(stream, false, null); generatedTabFile = ingestResult.getTabDelimitedFile(); generatedDataTable = ingestResult.getDataTable(); @@ -195,7 +195,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(floatVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -229,7 +229,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(longVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -256,7 +256,7 @@ public void testSubset() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); assertArrayEquals(stringVectors[vectorCount++], columnVector, "column " + i + ":"); } @@ -298,7 +298,7 @@ public void testVariableUNFs() { new FileInputStream(testFile))) { CSVFileReader instance = new CSVFileReader(new CSVFileReaderSpi(), ','); - ingestResult = instance.read(stream, null); + ingestResult = instance.read(stream, false, null); generatedTabFile = ingestResult.getTabDelimitedFile(); generatedDataTable = ingestResult.getDataTable(); @@ -327,7 +327,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Double[] columnVector = TabularSubsetGenerator.subsetDoubleVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); try { unf = UNFUtil.calculateUNF(columnVector); } catch (IOException | UnfException ioex) { @@ -345,7 +345,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + Long[] columnVector = TabularSubsetGenerator.subsetLongVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); try { unf = UNFUtil.calculateUNF(columnVector); @@ -363,7 +363,7 @@ public void testVariableUNFs() { fail("Failed to open generated tab-delimited file for reading" + ioex); } - String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue()); + String[] columnVector = TabularSubsetGenerator.subsetStringVector(generatedTabInputStream, i, generatedDataTable.getCaseQuantity().intValue(), false); String[] dateFormats = null; @@ -401,7 +401,7 @@ public void testVariableUNFs() { public void testBrokenCSV() { String brokenFile = "src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/csv/BrokenCSV.csv"; try { - new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, null); + new CSVFileReader(new CSVFileReaderSpi(), ',').read(null, false, null); fail("IOException not thrown on null csv"); } catch (NullPointerException ex) { String expMessage = null; @@ -412,7 +412,7 @@ public void testBrokenCSV() { } try (BufferedInputStream stream = new BufferedInputStream( new FileInputStream(brokenFile))) { - new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, null); + new CSVFileReader(new CSVFileReaderSpi(), ',').read(stream, false, null); fail("IOException was not thrown when collumns do not align."); } catch (IOException ex) { String expMessage = BundleUtil.getStringFromBundle("ingest.csv.recordMismatch", diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java index 113e9be6b54..8af36d6466d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/DTAFileReaderTest.java @@ -16,7 +16,7 @@ public class DTAFileReaderTest { @Test public void testOs() throws IOException { - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/50by1000.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("rel_8_or_9", result.getDataTable().getOriginalFormatVersion()); assertEquals(50, result.getDataTable().getDataVariables().size()); diff --git a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java index c963346b05e..0f14054f472 100644 --- a/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/ingest/tabulardata/impl/plugins/dta/NewDTAFileReaderTest.java @@ -25,7 +25,7 @@ public void testAuto() throws IOException { instance = new NewDTAFileReader(null, 117); // From https://www.stata-press.com/data/r13/auto.dta // `strings` shows "
      117" - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("scripts/search/data/tabular/stata13-auto.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(12, result.getDataTable().getDataVariables().size()); @@ -39,7 +39,7 @@ public void testAuto() throws IOException { @Test public void testStrl() throws IOException { instance = new NewDTAFileReader(null, 118); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "strl.dta"))), false, nullDataFile); DataTable table = result.getDataTable(); assertEquals("application/x-stata", table.getOriginalFileFormat()); assertEquals("STATA 14", table.getOriginalFormatVersion()); @@ -58,7 +58,7 @@ public void testStrl() throws IOException { @Test public void testDates() throws IOException { instance = new NewDTAFileReader(null, 118); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File(base + "dates.dta"))), false, nullDataFile); DataTable table = result.getDataTable(); assertEquals("application/x-stata", table.getOriginalFileFormat()); assertEquals("STATA 14", table.getOriginalFormatVersion()); @@ -77,7 +77,7 @@ public void testDates() throws IOException { @Test void testNull() { instance = new NewDTAFileReader(null, 117); - assertThrows(IOException.class, () -> instance.read(null, new File(""))); + assertThrows(IOException.class, () -> instance.read(null, false, new File(""))); } // TODO: Can we create a small file to check into the code base that exercises the value-label names non-zero offset issue? @@ -87,7 +87,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException { instance = new NewDTAFileReader(null, 117); // https://dataverse.harvard.edu/file.xhtml?fileId=2865667 Stata 13 HouseImputingCivilRightsInfo.dta md5=7dd144f27cdb9f8d1c3f4eb9c4744c42 - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/HouseImputingCivilRightsInfo.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(5, result.getDataTable().getDataVariables().size()); @@ -107,7 +107,7 @@ public void testFirstCategoryNonZeroOffset() throws IOException { public void testFirstCategoryNonZeroOffset1() throws IOException { instance = new NewDTAFileReader(null, 118); // https://dataverse.harvard.edu/file.xhtml?fileId=3140457 Stata 14: 2018_04_06_Aggregated_dataset_v2.dta - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/2018_04_06_Aggregated_dataset_v2.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 14", result.getDataTable().getOriginalFormatVersion()); assertEquals(227, result.getDataTable().getDataVariables().size()); @@ -136,7 +136,7 @@ public void test33k() throws IOException { @Test public void testCharacteristics() throws IOException { instance = new NewDTAFileReader(null, 117); - TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), nullDataFile); + TabularDataIngest result = instance.read(new BufferedInputStream(new FileInputStream(new File("/tmp/15aa6802ee5-5d2ed1bf55a5.dta"))), false, nullDataFile); assertEquals("application/x-stata", result.getDataTable().getOriginalFileFormat()); assertEquals("STATA 13", result.getDataTable().getOriginalFormatVersion()); assertEquals(441, result.getDataTable().getDataVariables().size()); diff --git a/src/test/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBeanTest.java index c1051a57db8..2a673ee4e79 100644 --- a/src/test/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountLoggingServiceBeanTest.java @@ -21,7 +21,6 @@ import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.MatcherAssert.assertThat; -import org.hamcrest.MatcherAssert; import org.junit.jupiter.api.Test; /** @@ -45,8 +44,8 @@ public void testMainAndFileConstructor() { GlobalId id = dataset.getGlobalId(); dataset.setGlobalId(id); dvVersion.setDataset(dataset); - dvVersion.setAuthorsStr("OneAuthor;TwoAuthor"); - dvVersion.setTitle("Title"); + dvVersion.setAuthorsStr("OneAuthor;\tTwoAuthor"); + dvVersion.setTitle("Title\tWith Tab"); dvVersion.setVersionNumber(1L); dvVersion.setReleaseTime(new Date()); @@ -64,7 +63,13 @@ public void testMainAndFileConstructor() { //lastly setting attributes we don't actually use currently in our logging/constructors, just in case entry.setUserCookieId("UserCookId"); - entry.setOtherId("OtherId"); + entry.setOtherId(null); // null pointer check for sanitize method + assertThat(entry.getOtherId(), is("-")); + entry.setOtherId("OtherId\t\r\nX"); + // escape sequences get replaced with a space in sanitize method + assertThat(entry.getOtherId(), is("OtherId X")); + // check other replacements for author list ";" becomes "|" + assertThat(entry.getAuthors(), is("OneAuthor| TwoAuthor")); //And test. "-" is the default assertThat(entry.getEventTime(), is(not("-"))); diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index dabc7f68fce..cffac741c78 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -1,18 +1,43 @@ package edu.harvard.iq.dataverse.pidproviders; -import edu.harvard.iq.dataverse.DOIServiceBean; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.GlobalIdServiceBean; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.UnmanagedDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.datacite.DataCiteProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.ezid.EZIdDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.ezid.EZIdProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.fake.FakeProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandleProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.handle.UnmanagedHandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkProviderFactory; +import edu.harvard.iq.dataverse.pidproviders.perma.UnmanagedPermaLinkPidProvider; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; + import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import jakarta.json.Json; import jakarta.json.JsonObjectBuilder; import jakarta.ws.rs.NotFoundException; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.extension.ExtendWith; @@ -24,32 +49,128 @@ import static org.junit.jupiter.api.Assertions.*; -/** - * Useful for testing but requires DataCite credentials, etc. - */ + @ExtendWith(MockitoExtension.class) +@LocalJvmSettings +//Perma 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "perma 1", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = PermaLinkPidProvider.TYPE, varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "DANSLINK", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "QE", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PERMALINK_SEPARATOR, value = "-", varArgs = "perma1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_EXCLUDED_LIST, value = "perma:DANSLINKQE123456, perma:bad, perma:LINKIT123456", varArgs ="perma1") + +//Perma 2 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "perma 2", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = PermaLinkPidProvider.TYPE, varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "DANSLINK", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "QE", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "perma:LINKIT/FK2ABCDEF", varArgs ="perma2") +@JvmSetting(key = JvmSettings.PERMALINK_SEPARATOR, value = "/", varArgs = "perma2") +@JvmSetting(key = JvmSettings.PERMALINK_BASE_URL, value = "https://example.org/123", varArgs = "perma2") +// Datacite 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "dataCite 1", varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = DataCiteDOIProvider.TYPE, varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5073", varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK2", varArgs = "dc1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_EXCLUDED_LIST, value = "doi:10.5073/FK2123456", varArgs ="dc1") +@JvmSetting(key = JvmSettings.DATACITE_MDS_API_URL, value = "https://mds.test.datacite.org/", varArgs = "dc1") +@JvmSetting(key = JvmSettings.DATACITE_REST_API_URL, value = "https://api.test.datacite.org", varArgs ="dc1") +@JvmSetting(key = JvmSettings.DATACITE_USERNAME, value = "test", varArgs ="dc1") +@JvmSetting(key = JvmSettings.DATACITE_PASSWORD, value = "changeme", varArgs ="dc1") +//Datacite 2 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "dataCite 2", varArgs = "dc2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = DataCiteDOIProvider.TYPE, varArgs = "dc2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5072", varArgs = "dc2") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK3", varArgs = "dc2") +@JvmSetting(key = JvmSettings.DATACITE_MDS_API_URL, value = "https://mds.test.datacite.org/", varArgs = "dc2") +@JvmSetting(key = JvmSettings.DATACITE_REST_API_URL, value = "https://api.test.datacite.org", varArgs ="dc2") +@JvmSetting(key = JvmSettings.DATACITE_USERNAME, value = "test2", varArgs ="dc2") +@JvmSetting(key = JvmSettings.DATACITE_PASSWORD, value = "changeme2", varArgs ="dc2") +//EZID 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "EZId 1", varArgs = "ez1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = EZIdDOIProvider.TYPE, varArgs = "ez1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5072", varArgs = "ez1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK2", varArgs = "ez1") +@JvmSetting(key = JvmSettings.EZID_API_URL, value = "https://ezid.cdlib.org/", varArgs = "ez1") +@JvmSetting(key = JvmSettings.EZID_USERNAME, value = "apitest", varArgs ="ez1") +@JvmSetting(key = JvmSettings.EZID_PASSWORD, value = "apitest", varArgs ="ez1") +//FAKE 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "FAKE 1", varArgs = "fake1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = FakeDOIProvider.TYPE, varArgs = "fake1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5074", varArgs = "fake1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK", varArgs = "fake1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "doi:10.5073/FK3ABCDEF", varArgs ="fake1") + +//HANDLE 1 +@JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "HDL 1", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = HandlePidProvider.TYPE, varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "20.500.1234", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "", varArgs = "hdl1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "hdl:20.20.20/FK2ABCDEF", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_AUTH_HANDLE, value = "20.500.1234/ADMIN", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_INDEPENDENT_SERVICE, value = "true", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_INDEX, value = "1", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_KEY_PASSPHRASE, value = "passphrase", varArgs ="hdl1") +@JvmSetting(key = JvmSettings.HANDLENET_KEY_PATH, value = "/tmp/cred", varArgs ="hdl1") + +//List to instantiate +@JvmSetting(key = JvmSettings.PID_PROVIDERS, value = "perma1, perma2, dc1, dc2, ez1, fake1, hdl1") + public class PidUtilTest { + @Mock private SettingsServiceBean settingsServiceBean; - @InjectMocks - private PermaLinkPidProviderServiceBean p = new PermaLinkPidProviderServiceBean(); - + @BeforeAll + //FWIW @JvmSetting doesn't appear to work with @BeforeAll + public static void setUpClass() throws Exception { + + //This mimics the initial config in the PidProviderFactoryBean.loadProviderFactories method - could potentially be used to mock that bean at some point + Map pidProviderFactoryMap = new HashMap<>(); + pidProviderFactoryMap.put(PermaLinkPidProvider.TYPE, new PermaLinkProviderFactory()); + pidProviderFactoryMap.put(DataCiteDOIProvider.TYPE, new DataCiteProviderFactory()); + pidProviderFactoryMap.put(HandlePidProvider.TYPE, new HandleProviderFactory()); + pidProviderFactoryMap.put(FakeDOIProvider.TYPE, new FakeProviderFactory()); + pidProviderFactoryMap.put(EZIdDOIProvider.TYPE, new EZIdProviderFactory()); + + PidUtil.clearPidProviders(); + + //Read list of providers to add + List providers = Arrays.asList(JvmSettings.PID_PROVIDERS.lookup().split(",\\s")); + //Iterate through the list of providers and add them using the PidProviderFactory of the appropriate type + for (String providerId : providers) { + System.out.println("Loading provider: " + providerId); + String type = JvmSettings.PID_PROVIDER_TYPE.lookup(providerId); + PidProviderFactory factory = pidProviderFactoryMap.get(type); + PidUtil.addToProviderList(factory.createPidProvider(providerId)); + } + PidUtil.addAllToUnmanagedProviderList(Arrays.asList(new UnmanagedDOIProvider(), + new UnmanagedHandlePidProvider(), new UnmanagedPermaLinkPidProvider())); + } + + @AfterAll + public static void tearDownClass() throws Exception { + PidUtil.clearPidProviders(); + } + @BeforeEach public void initMocks() { MockitoAnnotations.initMocks(this); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Protocol)).thenReturn("perma"); - Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Authority)).thenReturn("DANSLINK"); - p.reInit(); +// Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Protocol)).thenReturn("perma"); +// Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Authority)).thenReturn("DANSLINK"); } + /** + * Useful for testing but requires DataCite credentials, etc. + */ @Disabled @Test public void testGetDoi() throws IOException { String username = System.getenv("DataCiteUsername"); String password = System.getenv("DataCitePassword"); String baseUrl = "https://api.test.datacite.org"; - GlobalId pid = new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.70122","QE5A-XN55", "/", DOIServiceBean.DOI_RESOLVER_URL, null); + GlobalId pid = new GlobalId(AbstractDOIProvider.DOI_PROTOCOL,"10.70122","QE5A-XN55", "/", AbstractDOIProvider.DOI_RESOLVER_URL, null); try { JsonObjectBuilder result = PidUtil.queryDoi(pid, baseUrl, username, password); String out = JsonUtil.prettyPrint(result.build()); @@ -58,23 +179,299 @@ public void testGetDoi() throws IOException { System.out.println("ex: " + ex); } } + + + @Test + public void testFactories() throws IOException { + PidProvider p = PidUtil.getPidProvider("perma1"); + assertEquals("perma 1", p.getLabel()); + assertEquals(PermaLinkPidProvider.PERMA_PROTOCOL, p.getProtocol()); + assertEquals("DANSLINK", p.getAuthority()); + assertEquals("QE", p.getShoulder()); + assertEquals("-", p.getSeparator()); + assertTrue(p.getUrlPrefix().startsWith(SystemConfig.getDataverseSiteUrlStatic())); + p = PidUtil.getPidProvider("perma2"); + assertTrue(p.getUrlPrefix().startsWith("https://example.org/123")); + p = PidUtil.getPidProvider("dc2"); + assertEquals("FK3", p.getShoulder()); + + } @Test - public void testGetPermaLink() throws IOException { - List list = new ArrayList(); + public void testPermaLinkParsing() throws IOException { + //Verify that we can parse a valid perma link associated with perma1 + String pid1String = "perma:DANSLINK-QE-5A-XN55"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + //Check that it was parsed by perma1 and that the URL is correct, etc + assertEquals("perma1", pid2.getProviderId()); + assertEquals(SystemConfig.getDataverseSiteUrlStatic() + "/citation?persistentId=" + pid1String, pid2.asURL()); + assertEquals("DANSLINK", pid2.getAuthority()); + assertEquals(PermaLinkPidProvider.PERMA_PROTOCOL, pid2.getProtocol()); + //Verify that parsing the URL form works + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals("perma1", pid3.getProviderId()); + + //Repeat the basics with a permalink associated with perma2 + String pid4String = "perma:DANSLINK/QE-5A-XN55"; + GlobalId pid5 = PidUtil.parseAsGlobalID(pid4String); + assertEquals("perma2", pid5.getProviderId()); + assertEquals(pid4String, pid5.asString()); + assertEquals("https://example.org/123/citation?persistentId=" + pid4String, pid5.asURL()); - list.add(p); - PidUtil.addAllToProviderList(list); - GlobalId pid = new GlobalId(PermaLinkPidProviderServiceBean.PERMA_PROTOCOL,"DANSLINK","QE5A-XN55", "", p.getUrlPrefix(), PermaLinkPidProviderServiceBean.PERMA_PROVIDER_NAME); - System.out.println(pid.asString()); - System.out.println(pid.asURL()); + } + + @Test + public void testDOIParsing() throws IOException { - GlobalId pid2 = PidUtil.parseAsGlobalID(pid.asString()); - assertEquals(pid.asString(), pid2.asString()); - GlobalId pid3 = PidUtil.parseAsGlobalID(pid.asURL()); - assertEquals(pid.asString(), pid3.asString()); + String pid1String = "doi:10.5073/FK2ABCDEF"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + assertEquals("dc1", pid2.getProviderId()); + assertEquals("https://doi.org/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); + assertEquals("10.5073", pid2.getAuthority()); + assertEquals(AbstractDOIProvider.DOI_PROTOCOL, pid2.getProtocol()); + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals("dc1", pid3.getProviderId()); + String pid4String = "doi:10.5072/FK3ABCDEF"; + GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); + assertEquals(pid4String, pid4.asString()); + assertEquals("dc2", pid4.getProviderId()); + + String pid5String = "doi:10.5072/FK2ABCDEF"; + GlobalId pid5 = PidUtil.parseAsGlobalID(pid5String); + assertEquals(pid5String, pid5.asString()); + assertEquals("ez1", pid5.getProviderId()); + + String pid6String = "doi:10.5074/FKABCDEF"; + GlobalId pid6 = PidUtil.parseAsGlobalID(pid6String); + assertEquals(pid6String, pid6.asString()); + assertEquals("fake1", pid6.getProviderId()); + + + } + + @Test + public void testHandleParsing() throws IOException { + + String pid1String = "hdl:20.500.1234/10052"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + assertEquals("hdl1", pid2.getProviderId()); + assertEquals("https://hdl.handle.net/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); + assertEquals("20.500.1234", pid2.getAuthority()); + assertEquals(HandlePidProvider.HDL_PROTOCOL, pid2.getProtocol()); + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals("hdl1", pid3.getProviderId()); } + @Test + public void testUnmanagedParsing() throws IOException { + // A handle managed not managed in the hdl1 provider + String pid1String = "hdl:20.500.3456/10052"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + //Only parsed by the unmanaged provider + assertEquals(UnmanagedHandlePidProvider.ID, pid2.getProviderId()); + assertEquals(HandlePidProvider.HDL_RESOLVER_URL + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); + assertEquals("20.500.3456", pid2.getAuthority()); + assertEquals(HandlePidProvider.HDL_PROTOCOL, pid2.getProtocol()); + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals(UnmanagedHandlePidProvider.ID, pid3.getProviderId()); + + //Same for DOIs + String pid5String = "doi:10.6083/FK2ABCDEF"; + GlobalId pid5 = PidUtil.parseAsGlobalID(pid5String); + assertEquals(pid5String, pid5.asString()); + assertEquals(UnmanagedDOIProvider.ID, pid5.getProviderId()); + + //And Permalinks + String pid6String = "perma:NOTDANSQEABCDEF"; + GlobalId pid6 = PidUtil.parseAsGlobalID(pid6String); + assertEquals(pid6String, pid6.asString()); + assertEquals(UnmanagedPermaLinkPidProvider.ID, pid6.getProviderId()); + + } + + @Test + public void testExcludedSetParsing() throws IOException { + + String pid1String = "doi:10.5073/FK2123456"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + assertEquals(UnmanagedDOIProvider.ID, pid2.getProviderId()); + assertEquals("https://doi.org/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); + assertEquals("10.5073", pid2.getAuthority()); + assertEquals(AbstractDOIProvider.DOI_PROTOCOL, pid2.getProtocol()); + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals(UnmanagedDOIProvider.ID, pid3.getProviderId()); + + String pid4String = "perma:bad"; + GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); + assertEquals(pid4String, pid4.asString()); + assertEquals(UnmanagedPermaLinkPidProvider.ID, pid4.getProviderId()); + + String pid5String = "perma:DANSLINKQE123456"; + GlobalId pid5 = PidUtil.parseAsGlobalID(pid5String); + assertEquals(pid5String, pid5.asString()); + assertEquals(UnmanagedPermaLinkPidProvider.ID, pid5.getProviderId()); + + String pid6String = "perma:LINKIT123456"; + GlobalId pid6 = PidUtil.parseAsGlobalID(pid6String); + assertEquals(pid6String, pid6.asString()); + assertEquals(UnmanagedPermaLinkPidProvider.ID, pid6.getProviderId()); + + + } + + @Test + public void testManagedSetParsing() throws IOException { + + String pid1String = "doi:10.5073/FK3ABCDEF"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + assertEquals("fake1", pid2.getProviderId()); + assertEquals("https://doi.org/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); + assertEquals("10.5073", pid2.getAuthority()); + assertEquals(AbstractDOIProvider.DOI_PROTOCOL, pid2.getProtocol()); + GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); + assertEquals(pid1String, pid3.asString()); + assertEquals("fake1", pid3.getProviderId()); + assertFalse(PidUtil.getPidProvider(pid3.getProviderId()).canCreatePidsLike(pid3)); + + String pid4String = "hdl:20.20.20/FK2ABCDEF"; + GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); + assertEquals(pid4String, pid4.asString()); + assertEquals("hdl1", pid4.getProviderId()); + assertFalse(PidUtil.getPidProvider(pid4.getProviderId()).canCreatePidsLike(pid4)); + + String pid5String = "perma:LINKIT/FK2ABCDEF"; + GlobalId pid5 = PidUtil.parseAsGlobalID(pid5String); + assertEquals(pid5String, pid5.asString()); + assertEquals("perma2", pid5.getProviderId()); + assertFalse(PidUtil.getPidProvider(pid5.getProviderId()).canCreatePidsLike(pid5)); + } + + @Test + public void testFindingPidGenerators() throws IOException { + + Dataset dataset1 = new Dataset(); + Dataverse dataverse1 = new Dataverse(); + dataset1.setOwner(dataverse1); + String pidGeneratorSpecs = Json.createObjectBuilder().add("protocol", AbstractDOIProvider.DOI_PROTOCOL).add("authority","10.5072").add("shoulder", "FK2").build().toString(); + //Set a PID generator on the parent + dataverse1.setPidGeneratorSpecs(pidGeneratorSpecs); + assertEquals(pidGeneratorSpecs, dataverse1.getPidGeneratorSpecs()); + //Verify that the parent's PID generator is the effective one + assertEquals("ez1", dataverse1.getEffectivePidGenerator().getId()); + assertEquals("ez1", dataset1.getEffectivePidGenerator().getId()); + //Change dataset to have a provider and verify that it is used instead of any effective one + dataset1.setAuthority("10.5073"); + dataset1.setProtocol(AbstractDOIProvider.DOI_PROTOCOL); + dataset1.setIdentifier("FK2ABCDEF"); + //Reset to get rid of cached @transient value + dataset1.setPidGenerator(null); + assertEquals("dc1", dataset1.getGlobalId().getProviderId()); + assertEquals("dc1", dataset1.getEffectivePidGenerator().getId()); + assertTrue(PidUtil.getPidProvider(dataset1.getEffectivePidGenerator().getId()).canCreatePidsLike(dataset1.getGlobalId())); + + dataset1.setPidGenerator(null); + //Now set identifier so that the provider has this one in it's managed list (and therefore we can't mint new PIDs in the same auth/shoulder) and therefore we get the effective pid generator + dataset1.setIdentifier("FK3ABCDEF"); + assertEquals("fake1", dataset1.getGlobalId().getProviderId()); + assertEquals("ez1", dataset1.getEffectivePidGenerator().getId()); + + //Now test failure case + dataverse1.setPidGenerator(null); + dataset1.setPidGenerator(null); + pidGeneratorSpecs = Json.createObjectBuilder().add("protocol", AbstractDOIProvider.DOI_PROTOCOL).add("authority","10.9999").add("shoulder", "FK2").build().toString(); + //Set a PID generator on the parent + dataverse1.setPidGeneratorSpecs(pidGeneratorSpecs); + assertEquals(pidGeneratorSpecs, dataverse1.getPidGeneratorSpecs()); + //Verify that the parent's PID generator is the effective one + assertNull(dataverse1.getEffectivePidGenerator()); + assertNull(dataset1.getEffectivePidGenerator()); + } + + @Test + @JvmSetting(key = JvmSettings.LEGACY_DATACITE_MDS_API_URL, value = "https://mds.test.datacite.org/") + @JvmSetting(key = JvmSettings.LEGACY_DATACITE_REST_API_URL, value = "https://api.test.datacite.org") + @JvmSetting(key = JvmSettings.LEGACY_DATACITE_USERNAME, value = "test2") + @JvmSetting(key = JvmSettings.LEGACY_DATACITE_PASSWORD, value = "changeme2") + public void testLegacyConfig() throws IOException { + MockitoAnnotations.openMocks(this); + Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider)).thenReturn("DataCite"); + Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Shoulder)).thenReturn("FK2"); + + Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Protocol)).thenReturn("doi"); + Mockito.when(settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Authority)).thenReturn("10.5075"); + + + + String protocol = settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Protocol); + String authority = settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Authority); + String shoulder = settingsServiceBean.getValueForKey(SettingsServiceBean.Key.Shoulder); + String provider = settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DoiProvider); + + if (protocol != null && authority != null && shoulder != null && provider != null) { + // This line is different than in PidProviderFactoryBean because here we've + // already added the unmanaged providers, so we can't look for null + if (!PidUtil.getPidProvider(protocol, authority, shoulder).canManagePID()) { + PidProvider legacy = null; + // Try to add a legacy provider + String identifierGenerationStyle = settingsServiceBean + .getValueForKey(SettingsServiceBean.Key.IdentifierGenerationStyle, "random"); + String dataFilePidFormat = settingsServiceBean.getValueForKey(SettingsServiceBean.Key.DataFilePIDFormat, + "DEPENDENT"); + switch (provider) { + case "EZID": + /* + * String baseUrl = JvmSettings.PID_EZID_BASE_URL.lookup(String.class); String + * username = JvmSettings.PID_EZID_USERNAME.lookup(String.class); String + * password = JvmSettings.PID_EZID_PASSWORD.lookup(String.class); + * legacy = new EZIdDOIProvider("legacy", "legacy", authority, + * shoulder, identifierGenerationStyle, dataFilePidFormat, "", "", baseUrl, + * username, password); + */ + break; + case "DataCite": + String mdsUrl = JvmSettings.LEGACY_DATACITE_MDS_API_URL.lookup(String.class); + String restUrl = JvmSettings.LEGACY_DATACITE_REST_API_URL.lookup(String.class); + String dcUsername = JvmSettings.LEGACY_DATACITE_USERNAME.lookup(String.class); + String dcPassword = JvmSettings.LEGACY_DATACITE_PASSWORD.lookup(String.class); + if (mdsUrl != null && restUrl != null && dcUsername != null && dcPassword != null) { + legacy = new DataCiteDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", "", mdsUrl, restUrl, dcUsername, + dcPassword); + } + break; + case "FAKE": + System.out.println("Legacy FAKE found"); + legacy = new FakeDOIProvider("legacy", "legacy", authority, shoulder, + identifierGenerationStyle, dataFilePidFormat, "", ""); + break; + } + if (legacy != null) { + // Not testing parts that require this bean + legacy.setPidProviderServiceBean(null); + PidUtil.addToProviderList(legacy); + } + } else { + System.out.println("Legacy PID provider settings found - ignored since a provider for the same protocol, authority, shoulder has been registered"); + } + + } + + String pid1String = "doi:10.5075/FK2ABCDEF"; + GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); + assertEquals(pid1String, pid2.asString()); + assertEquals("legacy", pid2.getProviderId()); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderTest.java new file mode 100644 index 00000000000..572fc722272 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteProviderTest.java @@ -0,0 +1,187 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.ExtendWith; + +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; + +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.SITE_URL, value = "https://example.com") + +public class DataCiteProviderTest { + + static DataverseServiceBean dataverseSvc; + static SettingsServiceBean settingsSvc; + static PidProviderFactoryBean pidService; + static final String DEFAULT_NAME = "LibraScholar"; + + @BeforeAll + public static void setupMocks() { + dataverseSvc = Mockito.mock(DataverseServiceBean.class); + settingsSvc = Mockito.mock(SettingsServiceBean.class); + BrandingUtil.injectServices(dataverseSvc, settingsSvc); + + // initial values (needed here for other tests where this method is reused!) + Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); + Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); + + pidService = Mockito.mock(PidProviderFactoryBean.class); + Mockito.when(pidService.isGlobalIdLocallyUnique(any(GlobalId.class))).thenReturn(true); + Mockito.when(pidService.getProducer()).thenReturn("RootDataverse"); + + } + + /** + * Useful for testing but requires DataCite credentials, etc. + * + * To run the test: + * export DataCiteUsername=test2 + * export DataCitePassword=changeme2 + * export DataCiteAuthority=10.5072 + * export DataCiteShoulder=FK2 + * + * then run mvn test -Dtest=DataCiteProviderTest + * + * For each run of the test, one test DOI will be created and will remain in the registered state, as visible on Fabrica at doi.test.datacite.org + * (two DOIs are created, but one is deleted after being created in the draft state and never made findable.) + */ + @Test + @Disabled + public void testDoiLifecycle() throws IOException { + String username = System.getenv("DataCiteUsername"); + String password = System.getenv("DataCitePassword"); + String authority = System.getenv("DataCiteAuthority"); + String shoulder = System.getenv("DataCiteShoulder"); + DataCiteDOIProvider provider = new DataCiteDOIProvider("test", "test", authority, shoulder, "randomString", + SystemConfig.DataFilePIDFormat.DEPENDENT.toString(), "", "", "https://mds.test.datacite.org", + "https://api.test.datacite.org", username, password); + + provider.setPidProviderServiceBean(pidService); + + PidUtil.addToProviderList(provider); + + Dataset d = new Dataset(); + DatasetVersion dv = new DatasetVersion(); + DatasetFieldType primitiveDSFType = new DatasetFieldType(DatasetFieldConstant.title, + DatasetFieldType.FieldType.TEXT, false); + DatasetField testDatasetField = new DatasetField(); + + dv.setVersionState(VersionState.DRAFT); + + testDatasetField.setDatasetVersion(dv); + testDatasetField.setDatasetFieldType(primitiveDSFType); + testDatasetField.setSingleValue("First Title"); + List fields = new ArrayList<>(); + fields.add(testDatasetField); + dv.setDatasetFields(fields); + ArrayList dsvs = new ArrayList<>(); + dsvs.add(0, dv); + d.setVersions(dsvs); + + assertEquals(d.getCurrentName(), "First Title"); + + provider.generatePid(d); + assertEquals(d.getProtocol(), "doi"); + assertEquals(d.getAuthority(), authority); + assertTrue(d.getIdentifier().startsWith(shoulder)); + d.getGlobalId(); + + try { + provider.createIdentifier(d); + d.setIdentifierRegistered(true); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + assertEquals(DataCiteDOIProvider.DRAFT, provider.getPidStatus(d)); + Map mdMap = provider.getIdentifierMetadata(d); + assertEquals("First Title", mdMap.get("datacite.title")); + + testDatasetField.setSingleValue("Second Title"); + + //Modify called for a draft dataset shouldn't update DataCite (given current code) + try { + provider.modifyIdentifierTargetURL(d); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + //Verify the title hasn't changed + mdMap = provider.getIdentifierMetadata(d); + assertEquals("First Title", mdMap.get("datacite.title")); + //Check our local status + assertEquals(DataCiteDOIProvider.DRAFT, provider.getPidStatus(d)); + //Now delete the identifier + provider.deleteIdentifier(d); + //Causes a 404 and a caught exception that prints a stack trace. + mdMap = provider.getIdentifierMetadata(d); + // And verify the record is gone (no title, should be no entries at all) + assertEquals(null, mdMap.get("datacite.title")); + + //Now recreate and publicize in one step + assertTrue(provider.publicizeIdentifier(d)); + d.getLatestVersion().setVersionState(VersionState.RELEASED); + + //Verify the title hasn't changed + mdMap = provider.getIdentifierMetadata(d); + assertEquals("Second Title", mdMap.get("datacite.title")); + //Check our local status + assertEquals(DataCiteDOIProvider.FINDABLE, provider.getPidStatus(d)); + + //Verify that modify does update a published/findable record + testDatasetField.setSingleValue("Third Title"); + + try { + provider.modifyIdentifierTargetURL(d); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + mdMap = provider.getIdentifierMetadata(d); + assertEquals("Third Title", mdMap.get("datacite.title")); + + //Now delete the identifier . Once it's been findable, this should just flip the record to registered + //Not sure that can be easily verified in the test, but it will be visible in Fabrica + provider.deleteIdentifier(d); + d.getLatestVersion().setVersionState(VersionState.DEACCESSIONED); + + mdMap = provider.getIdentifierMetadata(d); + assertEquals("This item has been removed from publication", mdMap.get("datacite.title")); + + //Check our local status - just uses the version state + assertEquals(DataCiteDOIProvider.REGISTERED, provider.getPidStatus(d)); + + // provider.registerWhenPublished() + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java index 8c9e0261bfa..da94b288bee 100644 --- a/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/privateurl/PrivateUrlUtilTest.java @@ -102,7 +102,7 @@ public void testGetDatasetFromRoleAssignmentSuccess() { RoleAssignment ra = this.createTestRoleAssignment(dataset); assertNotNull(PrivateUrlUtil.getDatasetFromRoleAssignment(ra)); - assertEquals("#42", ra.getAssigneeIdentifier()); + assertEquals(PrivateUrlUser.PREFIX + "42", ra.getAssigneeIdentifier()); } @Test @@ -137,7 +137,7 @@ public void testGetDraftDatasetVersionFromRoleAssignmentSuccess() { DatasetVersion datasetVersionOut = PrivateUrlUtil.getDraftDatasetVersionFromRoleAssignment(ra); assertNotNull(datasetVersionOut); - assertEquals("#42", ra.getAssigneeIdentifier()); + assertEquals(PrivateUrlUser.PREFIX + "42", ra.getAssigneeIdentifier()); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java index ce6005a3d11..92b06e5936f 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/IndexServiceBeanTest.java @@ -1,19 +1,10 @@ package edu.harvard.iq.dataverse.search; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.DOIServiceBean; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldServiceBean; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.Dataverse.DataverseType; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.GlobalId; -import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.mocks.MocksFactory; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; @@ -21,6 +12,7 @@ import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.common.SolrInputDocument; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -30,8 +22,7 @@ import org.mockito.junit.jupiter.MockitoExtension; import java.io.IOException; -import java.util.Arrays; -import java.util.Set; +import java.util.*; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -108,9 +99,43 @@ public void TestIndexing() throws SolrServerException, IOException { assertTrue(indexedFields.contains("language")); } + @Test + public void testValidateBoundingBox() throws SolrServerException, IOException { + final IndexableDataset indexableDataset = createIndexableDataset(); + final DatasetVersion datasetVersion = indexableDataset.getDatasetVersion(); + DatasetField dsf = new DatasetField(); + DatasetFieldType dsft = new DatasetFieldType(DatasetFieldConstant.geographicBoundingBox, DatasetFieldType.FieldType.TEXT, true); + dsf.setDatasetFieldType(dsft); + + List vals = new LinkedList<>(); + DatasetFieldCompoundValue val = new DatasetFieldCompoundValue(); + val.setParentDatasetField(dsf); + val.setChildDatasetFields(Arrays.asList( + constructBoundingBoxValue(DatasetFieldConstant.westLongitude, "34.9"), // bad value. must be less than east + constructBoundingBoxValue(DatasetFieldConstant.eastLongitude, "34.8"), + constructBoundingBoxValue(DatasetFieldConstant.northLatitude, "34.2"), + constructBoundingBoxValue(DatasetFieldConstant.southLatitude, "34.1") + )); + vals.add(val); + dsf.setDatasetFieldCompoundValues(vals); + datasetVersion.getDatasetFields().add(dsf); + + final SolrInputDocuments docs = indexService.toSolrDocs(indexableDataset, null); + Optional doc = docs.getDocuments().stream().findFirst(); + assertTrue(doc.isPresent()); + assertTrue(!doc.get().containsKey("geolocation")); + assertTrue(!doc.get().containsKey("boundingBox")); + } + private DatasetField constructBoundingBoxValue(String datasetFieldTypeName, String value) { + DatasetField retVal = new DatasetField(); + retVal.setDatasetFieldType(new DatasetFieldType(datasetFieldTypeName, DatasetFieldType.FieldType.TEXT, false)); + retVal.setDatasetFieldValues(Collections.singletonList(new DatasetFieldValue(retVal, value))); + return retVal; + } + private IndexableDataset createIndexableDataset() { final Dataset dataset = MocksFactory.makeDataset(); - dataset.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.666", "FAKE/fake", "/", DOIServiceBean.DOI_RESOLVER_URL, null)); + dataset.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL,"10.666", "FAKE/fake", "/", AbstractDOIProvider.DOI_RESOLVER_URL, null)); final DatasetVersion datasetVersion = dataset.getCreateVersion(null); DatasetField field = createCVVField("language", "English", false); datasetVersion.getDatasetFields().add(field); diff --git a/src/test/java/edu/harvard/iq/dataverse/settings/JvmSettingsTest.java b/src/test/java/edu/harvard/iq/dataverse/settings/JvmSettingsTest.java index 6b03f20fc41..f4494b7116e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/settings/JvmSettingsTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/settings/JvmSettingsTest.java @@ -17,22 +17,15 @@ void lookupSetting() { } @Test - @SystemProperty(key = "doi.username", value = "test") - void lookupSettingViaAlias() { - assertEquals("test", JvmSettings.DATACITE_USERNAME.lookup()); + @SystemProperty(key = "dataverse.pid.datacite.datacite.username", value = "test") + void lookupPidProviderSetting() { + assertEquals("test", JvmSettings.DATACITE_USERNAME.lookup("datacite")); } @Test - @SystemProperty(key = "doi.baseurlstring", value = "test") + @SystemProperty(key = "dataverse.ingest.rserve.port", value = "1234") void lookupSettingViaAliasWithDefaultInMPCFile() { - assertEquals("test", JvmSettings.DATACITE_MDS_API_URL.lookup()); - } - - @Test - @SystemProperty(key = "doi.dataciterestapiurlstring", value = "foo") - @SystemProperty(key = "doi.mdcbaseurlstring", value = "bar") - void lookupSettingViaAliasWithDefaultInMPCFileAndTwoAliases() { - assertEquals("foo", JvmSettings.DATACITE_REST_API_URL.lookup()); + assertEquals("1234", JvmSettings.RSERVE_PORT.lookup()); } } \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java index 41032ffa811..f17cb825986 100644 --- a/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/sitemap/SiteMapUtilTest.java @@ -1,29 +1,32 @@ package edu.harvard.iq.dataverse.sitemap; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.File; import java.io.IOException; import java.net.URL; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Date; import java.util.List; import static org.junit.jupiter.api.Assertions.*; -import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.commons.lang3.StringUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -32,6 +35,10 @@ class SiteMapUtilTest { + // see https://www.sitemaps.org/protocol.html#validating + final String xsdSitemap = "https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"; + final String xsdSitemapIndex = "https://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"; + @TempDir Path tempDir; Path tempDocroot; @@ -66,14 +73,14 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { List datasets = new ArrayList<>(); Dataset published = new Dataset(); - published.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", "FAKE/published1", null, DOIServiceBean.DOI_RESOLVER_URL, null)); + published.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", "FAKE/published1", null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); String publishedPid = published.getGlobalId().asString(); published.setPublicationDate(new Timestamp(new Date().getTime())); published.setModificationTime(new Timestamp(new Date().getTime())); datasets.add(published); Dataset unpublished = new Dataset(); - unpublished.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", "FAKE/unpublished1", null, DOIServiceBean.DOI_RESOLVER_URL, null)); + unpublished.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", "FAKE/unpublished1", null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); String unpublishedPid = unpublished.getGlobalId().asString(); Timestamp nullPublicationDateToIndicateNotPublished = null; @@ -81,14 +88,14 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { datasets.add(unpublished); Dataset harvested = new Dataset(); - harvested.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", "FAKE/harvested1", null, DOIServiceBean.DOI_RESOLVER_URL, null)); + harvested.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", "FAKE/harvested1", null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); String harvestedPid = harvested.getGlobalId().asString(); harvested.setPublicationDate(new Timestamp(new Date().getTime())); harvested.setHarvestedFrom(new HarvestingClient()); datasets.add(harvested); Dataset deaccessioned = new Dataset(); - deaccessioned.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL, "10.666", "FAKE/deaccessioned1", null, DOIServiceBean.DOI_RESOLVER_URL, null)); + deaccessioned.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", "FAKE/deaccessioned1", null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); String deaccessionedPid = deaccessioned.getGlobalId().asString(); deaccessioned.setPublicationDate(new Timestamp(new Date().getTime())); @@ -105,7 +112,7 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { // then String pathToSiteMap = tempDocroot.resolve("sitemap").resolve("sitemap.xml").toString(); assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap)); - assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap, new URL("https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"))); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap, new URL(xsdSitemap))); File sitemapFile = new File(pathToSiteMap); String sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())))); @@ -116,7 +123,108 @@ void testUpdateSiteMap() throws IOException, ParseException, SAXException { assertFalse(sitemapString.contains(unpublishedPid)); assertFalse(sitemapString.contains(harvestedPid)); assertFalse(sitemapString.contains(deaccessionedPid)); + } + + @Test + void testHugeSiteMap() throws IOException, ParseException, SAXException { + // given + final int nbDataverse = 50; + final int nbDataset = SiteMapUtil.SITEMAP_LIMIT; + final Timestamp now = new Timestamp(new Date().getTime()); + // Regex validate dataset URL + final String sitemapUrlRegex = ".*/dataset\\.xhtml\\?persistentId=doi:10\\.666/FAKE/published[0-9]{1,5}$"; + // Regex validate sitemap URL: must include "/sitemap/" to be accessible because there is no pretty-faces rewrite + final String sitemapIndexUrlRegex = ".*/sitemap/sitemap[1-2]\\.xml$"; + final String today = LocalDateTime.now().format(DateTimeFormatter.ofPattern(SiteMapUtil.DATE_PATTERN)); + + final List dataverses = new ArrayList<>(nbDataverse); + for (int i = 1; i <= nbDataverse; i++) { + final Dataverse publishedDataverse = new Dataverse(); + publishedDataverse.setAlias(String.format("publishedDv%s", i)); + publishedDataverse.setModificationTime(now); + publishedDataverse.setPublicationDate(now); + dataverses.add(publishedDataverse); + } + + final List datasets = new ArrayList<>(nbDataset); + for (int i = 1; i <= nbDataset; i++) { + final Dataset published = new Dataset(); + published.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL, "10.666", String.format("FAKE/published%s", i), null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); + published.setPublicationDate(now); + published.setModificationTime(now); + datasets.add(published); + } + // when + SiteMapUtil.updateSiteMap(dataverses, datasets); + + // then + final Path siteMapDir = tempDocroot.resolve("sitemap"); + final String pathToSiteMapIndexFile = siteMapDir.resolve("sitemap_index.xml").toString(); + final String pathToSiteMap1File = siteMapDir.resolve("sitemap1.xml").toString(); + final String pathToSiteMap2File = siteMapDir.resolve("sitemap2.xml").toString(); + + // validate sitemap_index.xml file with XSD + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMapIndexFile)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMapIndexFile, new URL(xsdSitemapIndex))); + + // verify sitemap_index.xml content + File sitemapFile = new File(pathToSiteMapIndexFile); + String sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())), StandardCharsets.UTF_8)); + // System.out.println("sitemap: " + sitemapString); + + String[] lines = sitemapString.split("\n"); + for (int i = 0; i < lines.length; i++) { + String line = lines[i].strip(); + if (StringUtils.isNotBlank(line)) { + if (i == 0) { + assertEquals("", line); + } else if (i == 1) { + assertEquals("", line); + } else if (i == 2) { + assertEquals("", line); + } else if (line.startsWith("")) { + final String errorWithSitemapIndexUrl = String.format("Sitemap URL must match with \"%s\" but was \"%s\"", sitemapIndexUrlRegex, line); + assertTrue(line.matches(sitemapIndexUrlRegex), errorWithSitemapIndexUrl); + } else if (line.startsWith("")) { + assertEquals(String.format("%s", today), line); + } + } + } + + // validate sitemap1.xml file with XSD + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap1File)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap1File, new URL(xsdSitemap))); + + // validate sitemap2.xml file with XSD + assertDoesNotThrow(() -> XmlValidator.validateXmlWellFormed(pathToSiteMap2File)); + assertTrue(XmlValidator.validateXmlSchema(pathToSiteMap2File, new URL(xsdSitemap))); + + // verify sitemap2.xml content + sitemapFile = new File(pathToSiteMap2File); + sitemapString = XmlPrinter.prettyPrintXml(new String(Files.readAllBytes(Paths.get(sitemapFile.getAbsolutePath())), StandardCharsets.UTF_8)); + + lines = sitemapString.split("\n"); + assertEquals("", lines[0].strip()); + assertEquals("", lines[1].strip()); + boolean isContainsLocTag = false; + boolean isContainsLastmodTag = false; + // loop over 10 lines only, just need to validate the and tags + for (int i = 5; i < 15; i++) { + String line = lines[i].strip(); + if (StringUtils.isNotBlank(line)) { + if (line.startsWith("")) { + isContainsLocTag = true; + final String errorWithSitemapIndexUrl = String.format("Sitemap URL must match with \"%s\" but was \"%s\"", sitemapUrlRegex, line); + assertTrue(line.matches(sitemapUrlRegex), errorWithSitemapIndexUrl); + } else if (line.startsWith("")) { + isContainsLastmodTag = true; + assertEquals(String.format("%s", today), line); + } + } + } + assertTrue(isContainsLocTag, "Sitemap file must contains tag"); + assertTrue(isContainsLastmodTag, "Sitemap file must contains tag"); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java index 2cfe9f25d7e..46359d7b02c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/FileUtilTest.java @@ -1,13 +1,6 @@ package edu.harvard.iq.dataverse.util; -import edu.harvard.iq.dataverse.DataFile; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.Embargo; -import edu.harvard.iq.dataverse.FileMetadata; -import edu.harvard.iq.dataverse.Guestbook; -import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.util.FileUtil.FileCitationExtension; @@ -215,6 +208,42 @@ public void testIsPubliclyDownloadable3() { assertFalse(FileUtil.isPubliclyDownloadable(embargoedFileMetadata)); } + @Test + public void testIsPubliclyDownloadable4() { + + FileMetadata retentionFileMetadata = new FileMetadata(); + DataFile df = new DataFile(); + Retention r = new Retention(); + r.setDateUnavailable(LocalDate.now().minusDays(1) ); + df.setRetention(r); + retentionFileMetadata.setDataFile(df); + DatasetVersion dsv = new DatasetVersion(); + dsv.setVersionState(DatasetVersion.VersionState.RELEASED); + retentionFileMetadata.setDatasetVersion(dsv); + Dataset dataset = new Dataset(); + dsv.setDataset(dataset); + retentionFileMetadata.setRestricted(false); + assertFalse(FileUtil.isPubliclyDownloadable(retentionFileMetadata)); + } + + @Test + public void testIsPubliclyDownloadable5() { + + FileMetadata retentionFileMetadata = new FileMetadata(); + DataFile df = new DataFile(); + Retention r = new Retention(); + r.setDateUnavailable(LocalDate.now()); + df.setRetention(r); + retentionFileMetadata.setDataFile(df); + DatasetVersion dsv = new DatasetVersion(); + dsv.setVersionState(DatasetVersion.VersionState.RELEASED); + retentionFileMetadata.setDatasetVersion(dsv); + Dataset dataset = new Dataset(); + dsv.setDataset(dataset); + retentionFileMetadata.setRestricted(false); + assertTrue(FileUtil.isPubliclyDownloadable(retentionFileMetadata)); + } + @Test public void testgetFileDownloadUrl() { Long fileId = 42l; @@ -371,4 +400,45 @@ public void testHdf4File() throws IOException { assertEquals("application/octet-stream", contentType); } + @Test + public void testGZipFile() throws IOException { + String path = "src/test/resources/fits/"; + String pathAndFile = path + "FOSy19g0309t_c2f.fits.gz"; + File file = new File(pathAndFile); + String contentType = FileUtil.determineFileType(file, pathAndFile); + assertEquals("application/fits-gzipped", contentType); + } + + @Test + public void testDetermineFileTypeROCrate() { + final String roCrateContentType = "application/ld+json; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile rocrate = new DataFile(roCrateContentType); + + assertEquals(roCrateContentType, rocrate.getContentType()); + assertEquals("RO-Crate metadata", FileUtil.getUserFriendlyFileType(rocrate)); + assertEquals("Metadata", FileUtil.getIndexableFacetFileType(rocrate)); + + final File roCrateFile = new File("src/test/resources/fileutil/ro-crate-metadata.json"); + try { + assertEquals(roCrateContentType, FileUtil.determineFileType(roCrateFile, "ro-crate-metadata.json")); + } catch (IOException ex) { + fail(ex); + } + + // test ";" removal + final String dockerFileWithProfile = "application/x-docker-file; profile=\"http://www.w3.org/ns/json-ld#flattened http://www.w3.org/ns/json-ld#compacted https://w3id.org/ro/crate\""; + final DataFile dockerDataFile = new DataFile(dockerFileWithProfile); + + assertEquals(dockerFileWithProfile, dockerDataFile.getContentType()); + assertEquals("Docker Image File", FileUtil.getUserFriendlyFileType(dockerDataFile)); + assertEquals("Code", FileUtil.getIndexableFacetFileType(dockerDataFile)); + } + + @Test + public void testSanitizeFileName() { + assertEquals(null, FileUtil.sanitizeFileName(null)); + assertEquals("with_space", FileUtil.sanitizeFileName("with space")); + assertEquals("withcomma", FileUtil.sanitizeFileName("with,comma")); + assertEquals("with.txt", FileUtil.sanitizeFileName("with,\\?:;,.txt")); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/MailSessionProducerIT.java b/src/test/java/edu/harvard/iq/dataverse/util/MailSessionProducerIT.java new file mode 100644 index 00000000000..29b6598b1a9 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/MailSessionProducerIT.java @@ -0,0 +1,270 @@ +package edu.harvard.iq.dataverse.util; + +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.MailServiceBean; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.branding.BrandingUtilTest; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import edu.harvard.iq.dataverse.util.testing.Tags; +import io.restassured.RestAssured; +import jakarta.mail.Session; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.Wait; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.MountableFile; + +import java.util.Map; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * An integration test using a fake SMTP MTA to check for outgoing mails. + * LIMITATION: This test cannot possibly check if the production and injection of the session via CDI + * works, as it is not running within a servlet container. This would require usage of Arquillian + * or and end-to-end API test with a deployed application. + */ + +@Tag(Tags.INTEGRATION_TEST) +@Tag(Tags.USES_TESTCONTAINERS) +@Testcontainers(disabledWithoutDocker = true) +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.SYSTEM_EMAIL, value = "test@test.com") +class MailSessionProducerIT { + + private static final Integer PORT_SMTP = 1025; + private static final Integer PORT_HTTP = 1080; + + static SettingsServiceBean settingsServiceBean = Mockito.mock(SettingsServiceBean.class);; + static DataverseServiceBean dataverseServiceBean = Mockito.mock(DataverseServiceBean.class);; + + /** + * We need to reset the BrandingUtil mocks for every test, as we rely on them being set to default. + */ + @BeforeAll + static void setUp() { + // Setup mocks behavior, inject as deps + BrandingUtil.injectServices(dataverseServiceBean, settingsServiceBean); + } + @AfterAll + static void tearDown() { + BrandingUtilTest.tearDownMocks(); + } + + @Nested + @LocalJvmSettings + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpHost", varArgs = "host") + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpPort", varArgs = "port") + class WithoutAuthentication { + @Container + static GenericContainer maildev = new GenericContainer<>("maildev/maildev:2.1.0") + .withExposedPorts(PORT_HTTP, PORT_SMTP) + .waitingFor(Wait.forHttp("/")); + + static String tcSmtpHost() { + return maildev.getHost(); + } + + static String tcSmtpPort() { + return maildev.getMappedPort(PORT_SMTP).toString(); + } + + @BeforeAll + static void setup() { + RestAssured.baseURI = "http://" + tcSmtpHost(); + RestAssured.port = maildev.getMappedPort(PORT_HTTP); + } + + @Test + void createSession() { + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(0)); + + // given + Session session = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(session, settingsServiceBean); + + // when + boolean sent = mailer.sendSystemEmail("test@example.org", "Test", "Test", false); + + // then + assertTrue(sent); + //RestAssured.get("/email").body().prettyPrint(); + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(1)) + .body("[0].subject", equalTo("Test")); + } + + } + + @Nested + @LocalJvmSettings + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpHost", varArgs = "host") + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpPort", varArgs = "port") + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, varArgs = "ssl.enable", value = "true") + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, varArgs = "ssl.trust", value = "*") + class WithSSLWithoutAuthentication { + @Container + static GenericContainer maildev = new GenericContainer<>("maildev/maildev:2.1.0") + .withCopyFileToContainer(MountableFile.forClasspathResource("mail/cert.pem"), "/cert.pem") + .withCopyFileToContainer(MountableFile.forClasspathResource("mail/key.pem"), "/key.pem") + .withExposedPorts(PORT_HTTP, PORT_SMTP) + .withEnv(Map.of( + "MAILDEV_INCOMING_SECURE", "true", + "MAILDEV_INCOMING_CERT", "/cert.pem", + "MAILDEV_INCOMING_KEY", "/key.pem" + )) + .waitingFor(Wait.forHttp("/")); + + static String tcSmtpHost() { + return maildev.getHost(); + } + + static String tcSmtpPort() { + return maildev.getMappedPort(PORT_SMTP).toString(); + } + + @BeforeAll + static void setup() { + RestAssured.baseURI = "http://" + tcSmtpHost(); + RestAssured.port = maildev.getMappedPort(PORT_HTTP); + } + + @Test + void createSession() { + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(0)); + + // given + Session session = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(session, settingsServiceBean); + + // when + boolean sent = mailer.sendSystemEmail("test@example.org", "Test", "Test", false); + + // then + assertTrue(sent); + //RestAssured.get("/email").body().prettyPrint(); + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(1)) + .body("[0].subject", equalTo("Test")); + } + + } + + static final String username = "testuser"; + static final String password = "supersecret"; + + @Nested + @LocalJvmSettings + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpHost", varArgs = "host") + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, method = "tcSmtpPort", varArgs = "port") + @JvmSetting(key = JvmSettings.MAIL_MTA_AUTH, value = "yes") + @JvmSetting(key = JvmSettings.MAIL_MTA_USER, value = username) + @JvmSetting(key = JvmSettings.MAIL_MTA_PASSWORD, value = password) + class WithAuthentication { + @Container + static GenericContainer maildev = new GenericContainer<>("maildev/maildev:2.1.0") + .withExposedPorts(PORT_HTTP, PORT_SMTP) + .withEnv(Map.of( + "MAILDEV_INCOMING_USER", username, + "MAILDEV_INCOMING_PASS", password + )) + .waitingFor(Wait.forHttp("/")); + + static String tcSmtpHost() { + return maildev.getHost(); + } + + static String tcSmtpPort() { + return maildev.getMappedPort(PORT_SMTP).toString(); + } + + @BeforeAll + static void setup() { + RestAssured.baseURI = "http://" + tcSmtpHost(); + RestAssured.port = maildev.getMappedPort(PORT_HTTP); + } + + @Test + void createSession() { + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(0)); + + // given + Session session = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(session, settingsServiceBean); + + // when + boolean sent = mailer.sendSystemEmail("test@example.org", "Test", "Test", false); + + // then + assertTrue(sent); + //RestAssured.get("/email").body().prettyPrint(); + given().when().get("/email") + .then() + .statusCode(200) + .body("size()", is(1)) + .body("[0].subject", equalTo("Test")); + } + + } + + @Nested + @LocalJvmSettings + class InvalidConfiguration { + @Test + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, value = "1234", varArgs = "invalid") + void invalidConfigItemsAreIgnoredOnSessionBuild() { + assertDoesNotThrow(() -> new MailSessionProducer().getSession()); + + Session mailSession = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(mailSession, settingsServiceBean); + assertFalse(mailer.sendSystemEmail("test@example.org", "Test", "Test", false)); + } + + @Test + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, value = "foobar", varArgs = "host") + void invalidHostnameIsFailingWhenSending() { + assertDoesNotThrow(() -> new MailSessionProducer().getSession()); + + Session mailSession = new MailSessionProducer().getSession(); + MailServiceBean mailer = new MailServiceBean(mailSession, settingsServiceBean); + assertFalse(mailer.sendSystemEmail("test@example.org", "Test", "Test", false)); + } + + @Test + @JvmSetting(key = JvmSettings.MAIL_MTA_SETTING, varArgs = "port" , value = "foobar") + void invalidPortWithLetters() { + assertThrows(IllegalArgumentException.class, () -> new MailSessionProducer().getSession()); + } + } +} \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java index 205b1f0bfcf..f9236ab8338 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/MailUtilTest.java @@ -2,6 +2,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.UserNotification; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -33,6 +34,7 @@ public void setUp() { } + /* @Test public void testParseSystemAddress() { assertEquals("support@librascholar.edu", MailUtil.parseSystemAddress("support@librascholar.edu").getAddress()); @@ -46,6 +48,7 @@ public void testParseSystemAddress() { assertEquals(null, MailUtil.parseSystemAddress("\"LibraScholar Support Team ")); assertEquals(null, MailUtil.parseSystemAddress("support1@dataverse.org, support@librascholar.edu")); } + */ @Test @Order(1) @@ -80,7 +83,12 @@ public void testSubjectRevokeRole() { @Test public void testSubjectRequestFileAccess() { userNotification.setType(UserNotification.Type.REQUESTFILEACCESS); - assertEquals("LibraScholar: Access has been requested for a restricted file in dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); + AuthenticatedUser requestor = new AuthenticatedUser(); + requestor.setFirstName("Tom"); + requestor.setLastName("Jones"); + requestor.setUserIdentifier("TJ-1234"); + userNotification.setRequestor(requestor); + assertEquals("LibraScholar: Tom Jones (@TJ-1234) requested access to dataset \"\"", MailUtil.getSubjectTextBasedOnNotification(userNotification, null)); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java index d70a108e7c6..15905c2971b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/UrlTokenUtilTest.java @@ -1,12 +1,12 @@ package edu.harvard.iq.dataverse.util; -import edu.harvard.iq.dataverse.DOIServiceBean; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; @@ -32,7 +32,7 @@ void testGetToolUrlWithOptionalQueryParameters() { DatasetVersion dv = new DatasetVersion(); Dataset ds = new Dataset(); ds.setId(50L); - ds.setGlobalId(new GlobalId(DOIServiceBean.DOI_PROTOCOL,"10.5072","FK2ABCDEF",null, DOIServiceBean.DOI_RESOLVER_URL, null)); + ds.setGlobalId(new GlobalId(AbstractDOIProvider.DOI_PROTOCOL,"10.5072","FK2ABCDEF",null, AbstractDOIProvider.DOI_RESOLVER_URL, null)); dv.setDataset(ds); fmd.setDatasetVersion(dv); List fmdl = new ArrayList<>(); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java index f43a0c78284..9fac4d42bcd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/bagit/data/FileDataProviderFactoryTest.java @@ -23,21 +23,21 @@ public class FileDataProviderFactoryTest { public void should_return_FolderDataProvider_when_parameter_is_path() { FileDataProvider result = target.getFileDataProvider(Path.of(UUID.randomUUID().toString())); - MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(FolderDataProvider.class.getName())); + MatcherAssert.assertThat("should return FolderDataProvider when parameter is path", result instanceof FolderDataProvider); } @Test public void should_return_ZipFileDataProvider_when_parameter_is_file() throws IOException { FileDataProvider result = target.getFileDataProvider(Path.of(FIXTURE_DIRECTORY, "FileDataProviderFactoryTest.zip").toFile()); - MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(ZipFileDataProvider.class.getName())); + MatcherAssert.assertThat("should return ZipFileDataProvider when parameter is file", result instanceof ZipFileDataProvider); } @Test public void should_return_DataFileDataProvider_when_parameter_is_datafiles() { FileDataProvider result = target.getFileDataProvider("test-name", Collections.emptyList()); - MatcherAssert.assertThat(result.getClass().getName(), Matchers.is(DataFileDataProvider.class.getName())); + MatcherAssert.assertThat("should return DataFileDataProvider when parameter is datafiles", result instanceof DataFileDataProvider); } } \ No newline at end of file diff --git a/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java new file mode 100644 index 00000000000..89f04e0cd5a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/cache/CacheFactoryBeanTest.java @@ -0,0 +1,334 @@ +package edu.harvard.iq.dataverse.util.cache; + +import com.hazelcast.cluster.Address; +import com.hazelcast.config.Config; +import com.hazelcast.core.Hazelcast; +import com.hazelcast.core.HazelcastInstance; +import com.hazelcast.map.IMap; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.engine.command.Command; +import edu.harvard.iq.dataverse.engine.command.impl.ListDataverseContentCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ListExplicitGroupsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ListFacetsCommand; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.Tags; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +import javax.cache.Cache; +import javax.cache.CacheManager; +import javax.cache.configuration.CacheEntryListenerConfiguration; +import javax.cache.configuration.Configuration; +import javax.cache.integration.CompletionListener; +import javax.cache.processor.EntryProcessor; +import javax.cache.processor.EntryProcessorException; +import javax.cache.processor.EntryProcessorResult; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +public class CacheFactoryBeanTest { + private SystemConfig mockedSystemConfig; + static CacheFactoryBean cache = null; + + AuthenticatedUser authUser = new AuthenticatedUser(); + GuestUser guestUser = GuestUser.get(); + static final String settingDefaultCapacity = "30,60,120"; + public String getJsonSetting() { + return """ + [ + { + "tier": 0, + "limitPerHour": 10, + "actions": [ + "GetLatestPublishedDatasetVersionCommand", + "GetPrivateUrlCommand", + "GetDatasetCommand", + "GetLatestAccessibleDatasetVersionCommand" + ] + }, + { + "tier": 0, + "limitPerHour": 1, + "actions": [ + "CreateGuestbookResponseCommand", + "UpdateDatasetVersionCommand", + "DestroyDatasetCommand", + "DeleteDataFileCommand", + "FinalizeDatasetPublicationCommand", + "PublishDatasetCommand" + ] + }, + { + "tier": 1, + "limitPerHour": 30, + "actions": [ + "CreateGuestbookResponseCommand", + "GetLatestPublishedDatasetVersionCommand", + "GetPrivateUrlCommand", + "GetDatasetCommand", + "GetLatestAccessibleDatasetVersionCommand", + "UpdateDatasetVersionCommand", + "DestroyDatasetCommand", + "DeleteDataFileCommand", + "FinalizeDatasetPublicationCommand", + "PublishDatasetCommand" + ] + } + ]"""; + } + @BeforeEach + public void init() throws IOException { + // Reuse cache and config for all tests + if (cache == null) { + mockedSystemConfig = mock(SystemConfig.class); + doReturn(settingDefaultCapacity).when(mockedSystemConfig).getRateLimitingDefaultCapacityTiers(); + doReturn(getJsonSetting()).when(mockedSystemConfig).getRateLimitsJson(); + cache = new CacheFactoryBean(); + cache.systemConfig = mockedSystemConfig; + if (cache.rateLimitCache == null) { + cache.rateLimitCache = new TestCache(getConfig()); + } + + // Clear the static data, so it can be reloaded with the new mocked data + RateLimitUtil.rateLimitMap.clear(); + RateLimitUtil.rateLimits.clear(); + } + + // Reset to default auth user + authUser.setRateLimitTier(1); + authUser.setSuperuser(false); + authUser.setUserIdentifier("authUser"); + } + + @AfterAll + public static void cleanup() { + Hazelcast.shutdownAll(); + } + @Test + public void testGuestUserGettingRateLimited() { + Command action = new ListDataverseContentCommand(null,null); + boolean rateLimited = false; + int cnt = 0; + for (; cnt <100; cnt++) { + rateLimited = !cache.checkRate(guestUser, action); + if (rateLimited) { + break; + } + } + String key = RateLimitUtil.generateCacheKey(guestUser, action.getClass().getSimpleName()); + assertTrue(cache.rateLimitCache.containsKey(key)); + assertTrue(rateLimited && cnt > 1 && cnt <= 30, "rateLimited:"+rateLimited + " cnt:"+cnt); + } + + @Test + public void testAdminUserExemptFromGettingRateLimited() { + Command action = new ListExplicitGroupsCommand(null,null); + authUser.setSuperuser(true); + authUser.setUserIdentifier("admin"); + boolean rateLimited = false; + int cnt = 0; + for (; cnt <100; cnt++) { + rateLimited = !cache.checkRate(authUser, action); + if (rateLimited) { + break; + } + } + assertFalse(rateLimited); + assertTrue(cnt >= 99, "cnt:"+cnt); + } + + @Test + @Tag(Tags.NOT_ESSENTIAL_UNITTESTS) + @ResourceLock(value = "cache") + public void testAuthenticatedUserGettingRateLimited() throws InterruptedException { + Command action = new ListFacetsCommand(null,null); + authUser.setRateLimitTier(2); // 120 cals per hour - 1 added token every 30 seconds + boolean rateLimited = false; + int cnt; + for (cnt = 0; cnt <200; cnt++) { + rateLimited = !cache.checkRate(authUser, action); + if (rateLimited) { + break; + } + } + assertTrue(rateLimited); + assertEquals(120, cnt); + + for (cnt = 0; cnt <60; cnt++) { + Thread.sleep(1000);// Wait for bucket to be replenished (check each second for 1 minute max) + rateLimited = !cache.checkRate(authUser, action); + if (!rateLimited) { + break; + } + } + assertFalse(rateLimited, "rateLimited:"+rateLimited + " cnt:"+cnt); + + // Now change the user's tier, so it is no longer limited + authUser.setRateLimitTier(3); // tier 3 = no limit + for (cnt = 0; cnt <200; cnt++) { + rateLimited = !cache.checkRate(authUser, action); + if (rateLimited) { + break; + } + } + assertFalse(rateLimited); + assertEquals(200, cnt); + } + + private Config getConfig() { + return getConfig(null); + } + private Config getConfig(String members) { + Config config = new Config(); + config.getNetworkConfig().getJoin().getAutoDetectionConfig().setEnabled(false); + config.getNetworkConfig().getJoin().getMulticastConfig().setEnabled(false); + config.getNetworkConfig().getJoin().getAwsConfig().setEnabled(false); + config.getNetworkConfig().getJoin().getAzureConfig().setEnabled(false); + config.getNetworkConfig().getJoin().getTcpIpConfig().setEnabled(true); + if (members != null) { + config.getNetworkConfig().getJoin().getAutoDetectionConfig().setEnabled(true); + config.getNetworkConfig().getJoin().getTcpIpConfig().addMember(members); + } + return config; + } + + // convert Hazelcast IMap to JCache Cache + private class TestCache implements Cache{ + HazelcastInstance hzInstance; + IMap cache; + TestCache(Config config) { + hzInstance = Hazelcast.newHazelcastInstance(config); + cache = hzInstance.getMap("test"); + Address address = hzInstance.getCluster().getLocalMember().getAddress(); + cache.put("memberAddress", String.format("%s:%d", address.getHost(), address.getPort())); + } + @Override + public String get(String s) { + return cache.get(s); + } + @Override + public Map getAll(Set set) { + return null; + } + @Override + public boolean containsKey(String s) { + return get(s) != null; + } + @Override + public void loadAll(Set set, boolean b, CompletionListener completionListener) { + + } + @Override + public void put(String s, String s2) { + cache.put(s,s2); + } + @Override + public String getAndPut(String s, String s2) { + return null; + } + @Override + public void putAll(Map map) { + + } + @Override + public boolean putIfAbsent(String s, String s2) { + return false; + } + @Override + public boolean remove(String s) { + return false; + } + @Override + public boolean remove(String s, String s2) { + return false; + } + @Override + public String getAndRemove(String s) { + return null; + } + @Override + public boolean replace(String s, String s2, String v1) { + return false; + } + @Override + public boolean replace(String s, String s2) { + return false; + } + @Override + public String getAndReplace(String s, String s2) { + return null; + } + @Override + public void removeAll(Set set) { + + } + @Override + public void removeAll() { + + } + @Override + public void clear() { + cache.clear(); + } + @Override + public > C getConfiguration(Class aClass) { + return null; + } + @Override + public T invoke(String s, EntryProcessor entryProcessor, Object... objects) throws EntryProcessorException { + return null; + } + @Override + public Map> invokeAll(Set set, EntryProcessor entryProcessor, Object... objects) { + return null; + } + @Override + public String getName() { + return null; + } + @Override + public CacheManager getCacheManager() { + return null; + } + @Override + public void close() { + hzInstance.shutdown(); + } + @Override + public boolean isClosed() { + return false; + } + @Override + public T unwrap(Class aClass) { + return null; + } + @Override + public void registerCacheEntryListener(CacheEntryListenerConfiguration cacheEntryListenerConfiguration) { + + } + @Override + public void deregisterCacheEntryListener(CacheEntryListenerConfiguration cacheEntryListenerConfiguration) { + + } + @Override + public Iterator> iterator() { + return null; + } + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtilTest.java new file mode 100644 index 00000000000..5ddcc190993 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/util/cache/RateLimitUtilTest.java @@ -0,0 +1,152 @@ +package edu.harvard.iq.dataverse.util.cache; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.util.SystemConfig; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +public class RateLimitUtilTest { + + static SystemConfig mockedSystemConfig = mock(SystemConfig.class); + static SystemConfig mockedSystemConfigBad = mock(SystemConfig.class); + + static String getJsonSetting() { + return """ + [ + { + "tier": 0, + "limitPerHour": 10, + "actions": [ + "GetLatestPublishedDatasetVersionCommand", + "GetPrivateUrlCommand", + "GetDatasetCommand", + "GetLatestAccessibleDatasetVersionCommand" + ] + }, + { + "tier": 0, + "limitPerHour": 1, + "actions": [ + "CreateGuestbookResponseCommand", + "UpdateDatasetVersionCommand", + "DestroyDatasetCommand", + "DeleteDataFileCommand", + "FinalizeDatasetPublicationCommand", + "PublishDatasetCommand" + ] + }, + { + "tier": 1, + "limitPerHour": 30, + "actions": [ + "CreateGuestbookResponseCommand", + "GetLatestPublishedDatasetVersionCommand", + "GetPrivateUrlCommand", + "GetDatasetCommand", + "GetLatestAccessibleDatasetVersionCommand", + "UpdateDatasetVersionCommand", + "DestroyDatasetCommand", + "DeleteDataFileCommand", + "FinalizeDatasetPublicationCommand", + "PublishDatasetCommand" + ] + } + ]"""; + } + static final String settingJsonBad = "{\n"; + + @BeforeAll + public static void setUp() { + doReturn(getJsonSetting()).when(mockedSystemConfig).getRateLimitsJson(); + doReturn("100,200").when(mockedSystemConfig).getRateLimitingDefaultCapacityTiers(); + doReturn(settingJsonBad).when(mockedSystemConfigBad).getRateLimitsJson(); + doReturn("100,200").when(mockedSystemConfigBad).getRateLimitingDefaultCapacityTiers(); + } + @BeforeEach + public void resetRateLimitUtilSettings() { + RateLimitUtil.rateLimitMap.clear(); + RateLimitUtil.rateLimits.clear(); + } + @ParameterizedTest + @CsvSource(value = { + "100,0,", + "200,1,", + "1,0,DestroyDatasetCommand", + "100,0,Default Limit", + "30,1,DestroyDatasetCommand", + "200,1,Default Limit", + "-1,2,Default No Limit" + }) + void testConfig(int exp, int tier, String action) { + if (action == null) { + assertEquals(exp, RateLimitUtil.getCapacityByTier(mockedSystemConfig, tier)); + } else { + assertEquals(exp, RateLimitUtil.getCapacityByTierAndAction(mockedSystemConfig, tier, action)); + } + } + @ParameterizedTest + @CsvSource(value = { + "100,0,", + "200,1,", + "100,0,GetLatestAccessibleDatasetVersionCommand", + "200,1,GetLatestAccessibleDatasetVersionCommand", + "-1,2,GetLatestAccessibleDatasetVersionCommand" + }) + void testBadJson(int exp, int tier, String action) { + if (action == null) { + assertEquals(exp, RateLimitUtil.getCapacityByTier(mockedSystemConfigBad, tier)); + } else { + assertEquals(exp, RateLimitUtil.getCapacityByTierAndAction(mockedSystemConfigBad, tier, action)); + } + } + + @Test + public void testGenerateCacheKey() { + User user = GuestUser.get(); + assertEquals(RateLimitUtil.generateCacheKey(user,"action1"), ":guest:action1"); + } + @Test + public void testGetCapacity() { + SystemConfig config = mock(SystemConfig.class); + resetRateLimitUtil(config, true); + + GuestUser guestUser = GuestUser.get(); + assertEquals(10, RateLimitUtil.getCapacity(config, guestUser, "GetPrivateUrlCommand")); + + AuthenticatedUser authUser = new AuthenticatedUser(); + authUser.setRateLimitTier(1); + assertEquals(30, RateLimitUtil.getCapacity(config, authUser, "GetPrivateUrlCommand")); + authUser.setSuperuser(true); + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, authUser, "GetPrivateUrlCommand")); + + // no setting means rate limiting is not on + resetRateLimitUtil(config, false); + + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, guestUser, "GetPrivateUrlCommand")); + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, guestUser, "xyz")); + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, authUser, "GetPrivateUrlCommand")); + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, authUser, "abc")); + authUser.setRateLimitTier(99); + assertEquals(RateLimitUtil.NO_LIMIT, RateLimitUtil.getCapacity(config, authUser, "def")); + } + private void resetRateLimitUtil(SystemConfig config, boolean enable) { + doReturn(enable ? getJsonSetting() : "").when(config).getRateLimitsJson(); + doReturn(enable ? "100,200" : "").when(config).getRateLimitingDefaultCapacityTiers(); + RateLimitUtil.rateLimitMap.clear(); + RateLimitUtil.rateLimits.clear(); + } +} diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinterTest.java index b426f84a464..fc458d88acd 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/BriefJsonPrinterTest.java @@ -1,19 +1,16 @@ package edu.harvard.iq.dataverse.util.json; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldConstant; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.MetadataBlock; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.mocks.MocksFactory; import edu.harvard.iq.dataverse.workflow.Workflow; import jakarta.json.JsonObject; import org.junit.jupiter.api.Test; import java.util.Collections; +import java.util.List; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; /** * @@ -58,14 +55,16 @@ public void testJson_MetadataBlock() { mtb.setId(1L); mtb.setName("metadata_block_name"); mtb.setDisplayName("Metadata Block Name"); + mtb.setDatasetFieldTypes(List.of(new DatasetFieldType("JustAString", DatasetFieldType.FieldType.TEXT, false))); BriefJsonPrinter sut = new BriefJsonPrinter(); JsonObject res = sut.json(mtb).build(); assertEquals("Metadata Block Name", res.getString("displayName")); - assertEquals("metadata_block_name", res.getString("name")); - assertEquals(1, res.getInt("id")); - assertEquals(3, res.keySet().size()); + assertEquals("metadata_block_name", res.getString("name")); + assertFalse(res.getBoolean("displayOnCreate")); + assertEquals(1, res.getInt("id")); + assertEquals(4, res.keySet().size()); } /** diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonDatasetVersion.txt b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonDatasetVersion.txt index 22eb9af39f1..a144ca9c9b6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonDatasetVersion.txt +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonDatasetVersion.txt @@ -232,16 +232,16 @@ "typeName": "eastLongitude", "value": "23" }, - "northLongitude": { + "northLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "northLongitude", + "typeName": "northLatitude", "value": "786" }, - "southLongitude": { + "southLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "southLongitude", + "typeName": "southLatitude", "value": "34" }, "westLongitude": { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java index 1d054040e84..11da71e1980 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonPrinterTest.java @@ -114,7 +114,7 @@ public void testJson_RoleAssignment() { JsonObjectBuilder job = JsonPrinter.json(ra); assertNotNull(job); JsonObject jsonObject = job.build(); - assertEquals("#42", jsonObject.getString("assignee")); + assertEquals(PrivateUrlUser.PREFIX + "42", jsonObject.getString("assignee")); assertEquals(123, jsonObject.getInt("definitionPointId")); assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("privateUrlToken")); } @@ -135,7 +135,7 @@ public void testJson_PrivateUrl() { assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("token")); assertEquals("https://dataverse.example.edu/privateurl.xhtml?token=e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getString("link")); assertEquals("e1d53cf6-794a-457a-9709-7c07629a8267", jsonObject.getJsonObject("roleAssignment").getString("privateUrlToken")); - assertEquals("#42", jsonObject.getJsonObject("roleAssignment").getString("assignee")); + assertEquals(PrivateUrlUser.PREFIX + "42", jsonObject.getJsonObject("roleAssignment").getString("assignee")); } @Test @@ -158,8 +158,10 @@ public void testGetFileCategories() { emb.setDateAvailable(LocalDate.parse("2021-12-03")); emb.setReason("Some reason"); dataFile.setEmbargo(emb); + dsVersion.setId(Long.MIN_VALUE); fmd.setDatasetVersion(dsVersion); fmd.setDataFile(dataFile); + fmd.setVersion(Long.MIN_VALUE); List fileCategories = new ArrayList<>(); DataFileCategory dataFileCategory = new DataFileCategory(); dataFileCategory.setName("Data"); diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java index 725862db7ba..3e4f9a690d2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonUtilTest.java @@ -3,17 +3,17 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.Test; -public class JsonUtilTest { +class JsonUtilTest { @Test - public void testPrettyPrint() { - JsonUtil jsonUtil = new JsonUtil(); + void testPrettyPrint() { String nullString = null; assertEquals(null, JsonUtil.prettyPrint(nullString)); assertEquals("", JsonUtil.prettyPrint("")); assertEquals("junk", JsonUtil.prettyPrint("junk")); - assertEquals("{}", JsonUtil.prettyPrint("{}")); - assertEquals("{\n" + " \"foo\": \"bar\"\n" + "}", JsonUtil.prettyPrint("{\"foo\": \"bar\"}")); + assertEquals("{\n}", JsonUtil.prettyPrint("{}")); + assertEquals("[\n \"junk\"\n]", JsonUtil.prettyPrint("[\"junk\"]")); + assertEquals("{\n" + " \"foo\": \"bar\"\n" + "}", JsonUtil.prettyPrint("{\"foo\": \"bar\"}")); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/jsondata.txt b/src/test/java/edu/harvard/iq/dataverse/util/json/jsondata.txt index 1edf864e6e4..590a8663c86 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/jsondata.txt +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/jsondata.txt @@ -210,16 +210,16 @@ "typeName": "eastLongitude", "value": "23" }, - "northLongitude": { + "northLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "northLongitude", + "typeName": "northLatitude", "value": "786" }, - "southLongitude": { + "southLatitude": { "multiple": false, "typeClass": "primitive", - "typeName": "southLongitude", + "typeName": "southLatitude", "value": "34" }, "westLongitude": { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java index b93028b6365..f0e538616b2 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java @@ -144,9 +144,40 @@ public void testCreateZippedNonShapefile() throws IOException{ msg("Passed!"); } - - - + + + @Test + public void testShapefileWithQpjAndQmd() throws IOException { + msgt("(4) testShapefileWithQpjAndQmd"); + + // Create mock files for the new extensions + List fileNames = Arrays.asList("testShape.shp", "testShape.shx", "testShape.dbf", "testShape.prj", "testShape.qpj", "testShape.qmd"); + + // Create a zip file with these files + File zipFile = createAndZipFiles(fileNames, "testShapeWithNewExtensions.zip"); + + // Pass the zip to the ShapefileHandler + ShapefileHandler shpHandler = new ShapefileHandler(new FileInputStream(zipFile)); + shpHandler.DEBUG = true; + + // Check if it is recognized as a shapefile + assertTrue(shpHandler.containsShapefile(), "The zip should contain a shapefile with the new extensions"); + + // Get file groups map and verify presence + Map> fileGroups = shpHandler.getFileGroups(); + assertFalse(fileGroups.isEmpty(), "The file groups map should not be empty"); + + // Ensure the specific extensions are present + assertTrue(fileGroups.containsKey("testShape"), "The file group should contain the key 'testShape'"); + assertTrue(fileGroups.get("testShape").containsAll(Arrays.asList("shp", "shx", "dbf", "prj", "qpj", "qmd")), "The file group should include the new extensions .qpj and .qmd"); + + // Delete the test zip file + zipFile.delete(); + + msg("Test passed successfully!"); + } + + @Test public void testZippedTwoShapefiles() throws IOException{ msgt("(2) testZippedTwoShapefiles"); diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/EMailValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/EMailValidatorTest.java index 0cbc9e52759..614cdae2310 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/EMailValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/EMailValidatorTest.java @@ -1,5 +1,8 @@ package edu.harvard.iq.dataverse.validation; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; @@ -14,7 +17,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -public class EMailValidatorTest { +@LocalJvmSettings +class EMailValidatorTest { private final Validator validator = Validation.buildDefaultValidatorFactory().getValidator(); @@ -82,4 +86,27 @@ public void testConstraint(boolean expected, String mail) { violations.stream().findFirst().ifPresent( c -> { assertTrue(c.getMessage().contains(mail)); }); } + + public static Stream emailAsciiUtf8Examples() { + return Stream.of( + Arguments.of("false", "pete@mailinator.com"), + Arguments.of("false", "foobar@mail.science"), + Arguments.of("true", "lótus.gonçalves@éxample.com"), + Arguments.of("true", "begüm.vriezen@example.cologne") + ); + } + + @ParameterizedTest + @MethodSource("emailAsciiUtf8Examples") + @JvmSetting(key = JvmSettings.MAIL_MTA_SUPPORT_UTF8, value = "false") + void validateWhenMTADoesNotSupportUTF8(boolean needsUTF8Support, String mail) { + assertEquals(!needsUTF8Support, EMailValidator.isEmailValid(mail)); + } + + @ParameterizedTest + @MethodSource("emailAsciiUtf8Examples") + @JvmSetting(key = JvmSettings.MAIL_MTA_SUPPORT_UTF8, value = "true") + void validateWhenMTASupportsUTF8(boolean needsUTF8Support, String mail) { + assertTrue(EMailValidator.isEmailValid(mail)); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java b/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java index 8c29b609c9b..a344d6a600d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/validation/URLValidatorTest.java @@ -29,7 +29,9 @@ public static Stream stdUrlExamples() { Arguments.of(true, "http://foobar.com:9101"), Arguments.of(true, "ftp://user@foobar.com"), Arguments.of(false, "cnn.com"), - Arguments.of(false, "smb://user@foobar.com") + Arguments.of(false, "smb://user@foobar.com"), + // case of a real permalink that requires UrlValidator.ALLOW_2_SLASHES + Arguments.of(true, "https://archive.softwareheritage.org/swh:1:dir:561bfe6698ca9e58b552b4eb4e56132cac41c6f9;origin=https://github.com/gem-pasteur/macsyfinder;visit=swh:1:snp:1bde3cb370766b10132c4e004c7cb377979928d1;anchor=swh:1:rev:868637fce184865d8e0436338af66a2648e8f6e1") ); } diff --git a/src/test/resources/fits/FOSy19g0309t_c2f.fits.gz b/src/test/resources/fits/FOSy19g0309t_c2f.fits.gz new file mode 100644 index 00000000000..15ee3d5d323 Binary files /dev/null and b/src/test/resources/fits/FOSy19g0309t_c2f.fits.gz differ diff --git a/src/test/resources/images/bad.tiff b/src/test/resources/images/bad.tiff new file mode 100644 index 00000000000..c8b403f1728 Binary files /dev/null and b/src/test/resources/images/bad.tiff differ diff --git a/src/test/resources/images/good.tiff b/src/test/resources/images/good.tiff new file mode 100644 index 00000000000..417e39558f5 Binary files /dev/null and b/src/test/resources/images/good.tiff differ diff --git a/src/test/resources/json/cvoc-agroportal.json b/src/test/resources/json/cvoc-agroportal.json new file mode 100644 index 00000000000..03c9e2f4d07 --- /dev/null +++ b/src/test/resources/json/cvoc-agroportal.json @@ -0,0 +1,76 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://data.agroportal.lirmm.fr/", + "js-url": "https://domain.tld/assets/cvoc/ontoportal.js", + "headers": { + "Authorization": "apikey token=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX" + }, + "protocol": "ontoportal", + "retrieval-uri": "https://data.agroportal.lirmm.fr/ontologies/{keywordVocabulary}/classes/{encodeUrl:keywordTermURL}?language=en,fr", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "AGROVOC": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/AGROVOC", + "uriSpace": "http" + }, + "ONTOBIOTOPE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/ONTOBIOTOPE", + "uriSpace": "http" + }, + "CROPUSAGE": { + "vocabularyUri": "https://data.agroportal.lirmm.fr/ontologies/CROPUSAGE", + "uriSpace": "http" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/prefLabel" + ], + "indexIn": "keywordValue" + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/links/ontology" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/synonym" + ], + "indexIn": "keywordValue" + } + } + } +] diff --git a/src/test/resources/json/cvoc-orcid.json b/src/test/resources/json/cvoc-orcid.json new file mode 100644 index 00000000000..6b904aefc3f --- /dev/null +++ b/src/test/resources/json/cvoc-orcid.json @@ -0,0 +1,43 @@ +[ + { + "field-name": "creator", + "term-uri-field": "creator", + "js-url": "https://gdcc.github.io/dataverse-external-vocab-support/scripts/people.js", + "protocol": "orcid", + "retrieval-uri": "https://pub.orcid.org/v3.0/{0}/person", + "allow-free-text": true, + "prefix": "https://orcid.org/", + "managed-fields": {}, + "languages": "", + "vocabs": { + "orcid": { + "uriSpace": "https://orcid.org/" + } + }, + "retrieval-filtering": { + "@context": { + "personName": "https://schema.org/name", + "scheme": "http://www.w3.org/2004/02/skos/core#inScheme" + }, + "personName": { + "pattern": "{0}, {1}", + "params": [ + "/name/family-name/value", + "/name/given-names/value" + ] + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "scheme": { + "pattern": "ORCID" + }, + "@type": { + "pattern": "https://schema.org/Person" + } + } + } +] diff --git a/src/test/resources/json/cvoc-skosmos.json b/src/test/resources/json/cvoc-skosmos.json new file mode 100644 index 00000000000..6d32b29f054 --- /dev/null +++ b/src/test/resources/json/cvoc-skosmos.json @@ -0,0 +1,69 @@ +[ + { + "field-name": "keyword", + "term-uri-field": "keywordTermURL", + "cvoc-url": "https://demo.skosmos.org/", + "js-url": "https://github.com/gdcc/dataverse-external-vocab-support/blob/main/scripts/skosmos.js", + "protocol": "skosmos", + "retrieval-uri": "https://demo.skosmos.org/rest/v1/data?uri={0}", + "term-parent-uri": "", + "allow-free-text": true, + "languages": "en, fr", + "vocabs": { + "agrovoc": { + "vocabularyUri": "http://aims.fao.org/vest-registry/kos/agrovoc", + "uriSpace": "http://aims.fao.org/aos/agrovoc/" + } + }, + "managed-fields": { + "vocabularyName": "keywordVocabulary", + "termName": "keywordValue", + "vocabularyUri": "keywordVocabularyURI" + }, + "retrieval-filtering": { + "@context": { + "termName": "https://schema.org/name", + "vocabularyName": "https://dataverse.org/schema/vocabularyName", + "vocabularyUri": "https://dataverse.org/schema/vocabularyUri", + "lang": "@language", + "value": "@value" + }, + "@id": { + "pattern": "{0}", + "params": [ + "@id" + ] + }, + "termName": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/prefLabel" + ] + }, + "vocabularyName": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/prefLabel" + ] + }, + "vocabularyUri": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:ConceptScheme/uri" + ] + }, + "synonyms": { + "pattern": "{0}", + "params": [ + "/graph/uri=@id/altLabel" + ] + }, + "genericTerm": { + "pattern": "{0}", + "params": [ + "/graph/type=skos:Concept/prefLabel" + ] + } + } + } +] diff --git a/src/test/resources/json/dataset-finch2.json b/src/test/resources/json/dataset-finch2.json index 4bd6f33eb42..b214eacfa3c 100644 --- a/src/test/resources/json/dataset-finch2.json +++ b/src/test/resources/json/dataset-finch2.json @@ -100,6 +100,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -120,6 +126,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/resources/json/dataset-long-description.json b/src/test/resources/json/dataset-long-description.json index a6e5c291322..4d5478b0f63 100644 --- a/src/test/resources/json/dataset-long-description.json +++ b/src/test/resources/json/dataset-long-description.json @@ -92,6 +92,12 @@ "typeClass": "primitive", "value": "KeywordTerm1" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI1.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, @@ -112,6 +118,12 @@ "typeClass": "primitive", "value": "KeywordTerm2" }, + "keywordTermURI": { + "typeName": "keywordTermURI", + "multiple": false, + "typeClass": "primitive", + "value": "http://keywordTermURI2.org" + }, "keywordVocabulary": { "typeName": "keywordVocabulary", "multiple": false, diff --git a/src/test/resources/mail/cert.pem b/src/test/resources/mail/cert.pem new file mode 100644 index 00000000000..6115183d413 --- /dev/null +++ b/src/test/resources/mail/cert.pem @@ -0,0 +1,24 @@ +-----BEGIN CERTIFICATE----- +MIIEFTCCAv0CFAIjr/AvBVg4EX5/rk5+eFdfsquOMA0GCSqGSIb3DQEBCwUAMIHG +MQswCQYDVQQGEwJEVjEaMBgGA1UECAwRRGF0YXZlcnNlIENvdW50cnkxFzAVBgNV +BAcMDkRhdGF2ZXJzZSBDaXR5MS4wLAYDVQQKDCVHbG9iYWwgRGF0YXZlcnNlIENv +bW11bml0eSBDb25zb3J0aXVtMRswGQYDVQQLDBJUZXN0aW5nIERlcGFydG1lbnQx +FDASBgNVBAMMC2V4YW1wbGUub3JnMR8wHQYJKoZIhvcNAQkBFhB0ZXN0QGV4YW1w +bGUub3JnMB4XDTI0MDIyMDA3MTkxOVoXDTM0MDIxNzA3MTkxOVowgcYxCzAJBgNV +BAYTAkRWMRowGAYDVQQIDBFEYXRhdmVyc2UgQ291bnRyeTEXMBUGA1UEBwwORGF0 +YXZlcnNlIENpdHkxLjAsBgNVBAoMJUdsb2JhbCBEYXRhdmVyc2UgQ29tbXVuaXR5 +IENvbnNvcnRpdW0xGzAZBgNVBAsMElRlc3RpbmcgRGVwYXJ0bWVudDEUMBIGA1UE +AwwLZXhhbXBsZS5vcmcxHzAdBgkqhkiG9w0BCQEWEHRlc3RAZXhhbXBsZS5vcmcw +ggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCzQ55QKM/sVJMb9c5MKtc/ +YW3+MlCrCnGlo42DCjl6noZg8Gji4dOEMo29UcRtYqhOsx7HOXZ5ulj3YKiBfzht ++QV/ZofhMIN9F/N5XCi4MRPorFz+mPck5NDzH1SqYn5zGm5APPqFJlwBWxDKEfqe +6ir5gG91MzHHuJJSQq3nrSDq+/DXRwg/7L2O7da6pBqti7nYU0T5ql88nddkRhR8 +7NdeZndI+UVmkcnal/3ZpybW8ZNzpiP8nCJO3ASz9kXRC3cITS0zgKxl6USDZs+8 +NAM6R0r8icB89L+i8bOfbyU7nkN9T+xUTTOmalSmsYrMIedIBmcB7NuqbXPLEpeJ +AgMBAAEwDQYJKoZIhvcNAQELBQADggEBAA4U/uhswbeJB0gX4vfVqYf30A131Rvu +J4eaVrVLzuByP1R0MvbBCMMYZBlDVDhiFqRh4KdoVWBvTfxf/4McYZ1FhXkgRlOb +mv/mxVBqnXEu5msviApYmoLzMqgd91F3T4CWs66QIWVTJYh2McRKLG0+IfGp3aox +YKC/W2RPsUO2fKFnUDkYetXMuWg1KJYKuqE6u2lcoV3uHFphXplClnlwN+IwtWWY +cgfNBBRpwx6RXTk2XXgpCKYRBthBu1rowp7qiAwX7R5am6wDx0EIbevfR32bDReX +oAV8c9soJWwAUwH63jqq7KTO8Dg1oGHveZMk4HHGkCqZeGCjbDPaak4= +-----END CERTIFICATE----- diff --git a/src/test/resources/mail/key.pem b/src/test/resources/mail/key.pem new file mode 100644 index 00000000000..84d34efdce8 --- /dev/null +++ b/src/test/resources/mail/key.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCzQ55QKM/sVJMb +9c5MKtc/YW3+MlCrCnGlo42DCjl6noZg8Gji4dOEMo29UcRtYqhOsx7HOXZ5ulj3 +YKiBfzht+QV/ZofhMIN9F/N5XCi4MRPorFz+mPck5NDzH1SqYn5zGm5APPqFJlwB +WxDKEfqe6ir5gG91MzHHuJJSQq3nrSDq+/DXRwg/7L2O7da6pBqti7nYU0T5ql88 +nddkRhR87NdeZndI+UVmkcnal/3ZpybW8ZNzpiP8nCJO3ASz9kXRC3cITS0zgKxl +6USDZs+8NAM6R0r8icB89L+i8bOfbyU7nkN9T+xUTTOmalSmsYrMIedIBmcB7Nuq +bXPLEpeJAgMBAAECggEAQ3h3TQ9XVslsRxFIsLVNJ49JoWuZng7DwIai3AfMo4Cn +7jN+HqrFfBO08mUkq9D+rQRQ2MYhd+Zx1sXcFkVmXUnlTlKuYMzsKHiLzIkp0E20 +gxXguHilSI8Qr/kCWlDQ7AyuI2JwHg5WgbIfSxbiP86+FwNGsBNxMI0hEXIEV1ZY +OFXO6AWO63D4zwbwMT30k8cjfyjGvjEtoGmjnBJcrJLSADCIWLcFCw+Cm8vcRkCd +BEpfRzeEos/NVdOqCpi1ea3OkGAY94mXxz6gaFRbeJFj9b6st7oVZLBOiMx1eafH +hgB9JkfVtDogl9B13MkqRN8WAiOgAjIo2Ukq8x1ZkwKBgQD88sdh8k1eldO9UXG1 +BjEsB2mEnzp1hvjuRlMQtnvOjDakbqozzbNQlq9YJxocphLyUPM/BKTsIGp0SPpd +vo0lgspDJ5eLnHd/Xf/guYvKg90NsHZR6V7hf9Z4JcrwrwvXpf7Lp/m95Jwd930j +/kPXw25gRFmpJ8Q9ciIk0PF0NwKBgQC1bUTK8iarZHhDGnR+/AhjkfSnb0z725Qb +w7MYRvicRNWT0wnk3njMMfXYS0rbxw7O5LlSoyCf+n6dGtHqJWCS1+lYuCjCz1vr +hMVFbpcEhob0OAhg8YMgzQRsmeJcBm8slVEOrmmVhQQZPRBjAaQw2f6cjW/ZhzZd +JHSiDw3yPwKBgQDLSleB2Zni3al56v3mzh4w05gzVUFHeX2RCoXx1ad1He1AhAxY +bAakSyaLQ4nR4osxomuMhzAA8iB8araFJwMLVa03AZfjRZIolCR0uMqnrQi42syN +EnEF7JcyorUScKyk2S0JAmxN+HCcCO7TQaPGwbNwvR4OO/6Un6jfS+nySwKBgH6n +4bashkJwyWRPO7TKzjB03I9nLB9Hk4YugQEZysWNaGzij62vgjVLS43MQl5cAQJ+ +usHuEACfJ3UWHCWSInFhOg4twob9q/YnonBuXA9UuzITTAYhlKF5fvUyGMyV0VcW +hpfxOtSfH9Vew+naY32XMiCovMTnmBQ+Nw5L5DiRAoGAV5/JT4z57Y+8npBCRr1m +NJZBXjQ8rmjYBCs+jOQ48wK2mEgcgARIgVGgi9MZZ2BUFHPThGS1o4OYE+fdqD95 +bvg1XInVpNwebLP6UZa9xZ8oGd3Auxfsav1WJB+CZo2tOX5Qt+GnwiumEr3Dlf1d +UVXDNM5A/sl1IDL3T3IEdSw= +-----END PRIVATE KEY----- diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 18911b3164a..44bbfdcceb7 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT diff --git a/tests/shell/spec/update_fields_spec.sh b/tests/shell/spec/update_fields_spec.sh index 48054a121b7..fa61743bf6f 100644 --- a/tests/shell/spec/update_fields_spec.sh +++ b/tests/shell/spec/update_fields_spec.sh @@ -1,16 +1,17 @@ #shellcheck shell=sh +#shellcheck disable=SC2154 update_fields() { - ../../conf/solr/9.3.0/update-fields.sh "$@" + ../../conf/solr/update-fields.sh "$@" } Describe "Update fields command" Describe "can operate on upstream data" - copyUpstreamSchema() { cp ../../conf/solr/9.3.0/schema.xml data/solr/upstream-schema.xml; } + copyUpstreamSchema() { cp ../../conf/solr/schema.xml data/solr/upstream-schema.xml; } AfterAll 'copyUpstreamSchema' - Path schema-xml="../../conf/solr/9.3.0/schema.xml" + Path schema-xml="../../conf/solr/schema.xml" It "needs upstream schema.xml" The path schema-xml should be exist End @@ -115,16 +116,16 @@ Describe "Update fields command" End Describe "succeeds because" - setup() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } - cleanup() { rm data/solr/minimal-schema-work.xml; } - BeforeEach 'setup' - AfterEach 'cleanup' + setup1() { cp data/solr/minimal-schema.xml data/solr/minimal-schema-work.xml; } + cleanup1() { rm data/solr/minimal-schema-work.xml; } + BeforeEach 'setup1' + AfterEach 'cleanup1' deleteUpstreamSchema() { rm data/solr/upstream-schema.xml; } AfterAll 'deleteUpstreamSchema' - match_content() { - grep -q "$@" "${match_content}" + match_content1() { + grep -q "$@" "${match_content1}" } It "prints nothing when editing minimal schema" @@ -133,8 +134,8 @@ Describe "Update fields command" The status should equal 0 The output should equal "" The path data/solr/minimal-schema-work.xml should be file - The path data/solr/minimal-schema-work.xml should satisfy match_content "