From 15c4e5ecd2f45c3b0e50a9f0f55e7232f4d87a48 Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Thu, 19 May 2022 16:22:13 +0200 Subject: [PATCH 1/9] Initial implementation of reExportDataset API endpoint --- .../iq/dataverse/DatasetServiceBean.java | 35 ++++++++++++++++- .../harvard/iq/dataverse/api/Metadata.java | 39 ++++++++++++++++--- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 8ebdc4745e6..db11c050742 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -777,7 +777,40 @@ public void exportAllDatasets(boolean forceReExport) { } } - + + @Asynchronous + public void reExportDatasetAsync(Dataset dataset) { + exportDataset(dataset, true); + } + + public void exportDataset(Dataset dataset, boolean forceReExport) { + // Note that we reExport only one dataset so we don't log in a separate export logging file here + if (dataset != null) { + // Accurate "is published?" test - ? + // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is a dvobject method + // that returns (publicationDate != null). And "publicationDate" is essentially + // "the first publication date"; that stays the same as versions get + // published and/or deaccessioned. But in combination with !isDeaccessioned() + // it is indeed an accurate test. + if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { + + // can't trust dataset.getPublicationDate(), no. + Date publicationDate = dataset.getReleasedVersion().getReleaseTime(); // we know this dataset has a non-null released version! Maybe not - SEK 8/19 (We do now! :) + if (forceReExport || (publicationDate != null + && (dataset.getLastExportTime() == null + || dataset.getLastExportTime().before(publicationDate)))) { + try { + recordService.exportAllFormatsInNewTransaction(dataset); + logger.info("Success exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString()); + } catch (Exception ex) { + logger.info("Error exporting dataset: " + dataset.getDisplayName() + " " + dataset.getGlobalIdString() + "; " + ex.getMessage()); + } + } + } + } + + } + //get a string to add to save success message //depends on dataset state and user privleges public String getReminderString(Dataset dataset, boolean canPublishDataset) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index 5084b5267a4..34a2b524621 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -5,19 +5,25 @@ */ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; + +import java.io.IOException; +import java.util.concurrent.Future; import java.util.logging.Logger; import javax.ejb.EJB; -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; +import javax.json.Json; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObjectBuilder; +import javax.ws.rs.*; import javax.ws.rs.core.Response; import javax.ws.rs.core.Response; -import javax.ws.rs.PathParam; -import javax.ws.rs.PUT; + +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.harvest.server.OAISetServiceBean; import edu.harvard.iq.dataverse.harvest.server.OAISet; +import org.apache.solr.client.solrj.SolrServerException; /** * @@ -59,7 +65,28 @@ public Response exportAll() { public Response reExportAll() { datasetService.reExportAllAsync(); return this.accepted(); - } + } + + @GET + @Path("reExportDataset") + public Response indexDatasetByPersistentId(@QueryParam("persistentId") String persistentId) { + if (persistentId == null) { + return error(Response.Status.BAD_REQUEST, "No persistent id given."); + } + Dataset dataset = null; + try { + dataset = datasetService.findByGlobalId(persistentId); + } catch (Exception ex) { + return error(Response.Status.BAD_REQUEST, "Problem looking up dataset with persistent id \"" + persistentId + "\". Error: " + ex.getMessage()); + } + if (dataset != null) { + datasetService.reExportDatasetAsync(dataset); + return ok("export started"); + //return this.accepted(); + } else { + return error(Response.Status.BAD_REQUEST, "Could not find dataset with persistent id " + persistentId); + } + } /** * initial attempt at triggering indexing/creation/population of a OAI set without going throught From 6a74e0b5e26791828f3e3ea009505768f3f81fc9 Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Thu, 19 May 2022 16:52:12 +0200 Subject: [PATCH 2/9] Cleanup --- src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 2 -- src/main/java/edu/harvard/iq/dataverse/api/Metadata.java | 1 - 2 files changed, 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index d53c0040706..4f9e76bf608 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -836,8 +836,6 @@ public void exportDataset(Dataset dataset, boolean forceReExport) { } - //get a string to add to save success message - //depends on dataset state and user privleges public String getReminderString(Dataset dataset, boolean canPublishDataset) { return getReminderString( dataset, canPublishDataset, false); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index 34a2b524621..532cde5ba93 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -82,7 +82,6 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe if (dataset != null) { datasetService.reExportDatasetAsync(dataset); return ok("export started"); - //return this.accepted(); } else { return error(Response.Status.BAD_REQUEST, "Could not find dataset with persistent id " + persistentId); } From 06852b8d0ec370c22cd1c91bf50f81b4e249800d Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Wed, 1 Jun 2022 14:23:07 +0200 Subject: [PATCH 3/9] Implemented clearExportTimestamps --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 7 +++++++ src/main/java/edu/harvard/iq/dataverse/api/Metadata.java | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 4f9e76bf608..292328a2a07 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -881,6 +881,13 @@ public void updateLastExportTimeStamp(Long datasetId) { em.createNativeQuery("UPDATE Dataset SET lastExportTime='"+now.toString()+"' WHERE id="+datasetId).executeUpdate(); } + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public int clearAllExportTimes() { + Query clearExportTimes = em.createQuery("UPDATE Dataset SET lastExportTime = NULL"); + int numRowsUpdated = clearExportTimes.executeUpdate(); + return numRowsUpdated; + } + public Dataset setNonDatasetFileAsThumbnail(Dataset dataset, InputStream inputStream) { if (dataset == null) { logger.fine("In setNonDatasetFileAsThumbnail but dataset is null! Returning null."); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index 532cde5ba93..b66928d70a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -87,6 +87,14 @@ public Response indexDatasetByPersistentId(@QueryParam("persistentId") String pe } } + @GET + @Path("clearExportTimestamps") + public Response clearExportTimestamps() { + // only clear the timestamp in the database, cached metadata export files are not deleted + int numItemsCleared = datasetService.clearAllExportTimes(); + return ok("cleared: " + numItemsCleared); + } + /** * initial attempt at triggering indexing/creation/population of a OAI set without going throught * the UI. From 102f65fbfb226971bbf3097dc5dcf58f754aed0b Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Tue, 14 Jun 2022 11:53:17 +0200 Subject: [PATCH 4/9] Refactoring comment in exportDataset --- .../edu/harvard/iq/dataverse/DatasetServiceBean.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 292328a2a07..163cf44673a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -809,14 +809,9 @@ public void reExportDatasetAsync(Dataset dataset) { } public void exportDataset(Dataset dataset, boolean forceReExport) { - // Note that we reExport only one dataset so we don't log in a separate export logging file here if (dataset != null) { - // Accurate "is published?" test - ? - // Answer: Yes, it is! We can't trust dataset.isReleased() alone; because it is a dvobject method - // that returns (publicationDate != null). And "publicationDate" is essentially - // "the first publication date"; that stays the same as versions get - // published and/or deaccessioned. But in combination with !isDeaccessioned() - // it is indeed an accurate test. + // Note that the logic for handling a dataset is similar to what is implemented in exportAllDatasets, + // but when only one dataset is exported we do not log in a separate export logging file if (dataset.isReleased() && dataset.getReleasedVersion() != null && !dataset.isDeaccessioned()) { // can't trust dataset.getPublicationDate(), no. From 89b00901f0c79848a9cf697639f0b973c9ba7a2d Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Tue, 14 Jun 2022 13:34:57 +0200 Subject: [PATCH 5/9] Added guides documentation for clearExportTimestamps and reExportDataset --- .../source/admin/metadataexport.rst | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index c9518b465fc..654eed7ea0a 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -14,16 +14,26 @@ A scheduled timer job that runs nightly will attempt to export any published dat Batch exports through the API ----------------------------- -In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following 2 API calls are provided: +In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following 4 API calls are provided: ``curl http://localhost:8080/api/admin/metadata/exportAll`` ``curl http://localhost:8080/api/admin/metadata/reExportAll`` -The former will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. -The latter will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. +``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps`` -These calls return a status message informing the administrator, that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. +``curl http://localhost:8080/api/admin/metadata/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` + +The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. +The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. + +The first two calls return a status message informing the administrator, that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. + +Instead of running the 'reExportAll' the same can be accomplished using 'clearExportTimestamps' followed by 'exportAll'. +The difference is that when the exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. +Calling clearExportTimestamps should return *{"status":"OK","data":{"message":"cleared: X"}}* where 'X' is the total number of datasets. + +The reExportDataset call gives you the opportunity to *force* a re-export only a specific dataset and with some script automation could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is not really needed. . Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set. From 81b2937fd78dc28a3b05149c7f1d37aa947af3af Mon Sep 17 00:00:00 2001 From: Paul Boon Date: Fri, 9 Sep 2022 14:29:14 +0200 Subject: [PATCH 6/9] Apply suggestions from code review Documentation textual improvements Co-authored-by: Philip Durbin --- doc/sphinx-guides/source/admin/metadataexport.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 654eed7ea0a..309b186e258 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -14,7 +14,7 @@ A scheduled timer job that runs nightly will attempt to export any published dat Batch exports through the API ----------------------------- -In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following 4 API calls are provided: +In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following four API calls are provided: ``curl http://localhost:8080/api/admin/metadata/exportAll`` @@ -27,13 +27,13 @@ In addition to the automated exports, a Dataverse installation admin can start a The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. -The first two calls return a status message informing the administrator, that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. +The first two calls return a status message informing the administrator that the process has been launched (``{"status":"WORKFLOW_IN_PROGRESS"}``). The administrator can check the progress of the process via log files: ``[Payara directory]/glassfish/domains/domain1/logs/export_[time stamp].log``. -Instead of running the 'reExportAll' the same can be accomplished using 'clearExportTimestamps' followed by 'exportAll'. -The difference is that when the exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. -Calling clearExportTimestamps should return *{"status":"OK","data":{"message":"cleared: X"}}* where 'X' is the total number of datasets. +Instead of running "reExportAll" the same can be accomplished using "clearExportTimestamps" followed by "exportAll". +The difference is that when exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. +Calling clearExportTimestamps should return ``{"status":"OK","data":{"message":"cleared: X"}}`` where "X" is the total number of datasets cleared. -The reExportDataset call gives you the opportunity to *force* a re-export only a specific dataset and with some script automation could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is not really needed. . +The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set. From eeaefecf68d2b46180ff7bbdc82a534076ca9e83 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 9 Sep 2022 15:04:42 -0400 Subject: [PATCH 7/9] add API test for new testExport method near similar method #8720 #5771 --- .../java/edu/harvard/iq/dataverse/api/DatasetsIT.java | 9 +++++++-- src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 4921bd882f8..9647f301805 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -532,7 +532,6 @@ public void testCreatePublishDestroyDataset() { * This test requires the root dataverse to be published to pass. */ @Test - @Ignore public void testExport() { Response createUser = UtilIT.createRandomUser(); @@ -641,9 +640,15 @@ public void testExport() { exportDatasetAsDdi.then().assertThat() .statusCode(OK.getStatusCode()); - assertEquals("sammi@sample.com", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact.@email")); + // This is now returning [] instead of sammi@sample.com. Not sure why. + // :ExcludeEmailFromExport is absent so the email should be shown. + assertEquals("[]", XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.stdyDscr.stdyInfo.contact.@email")); assertEquals(datasetPersistentId, XmlPath.from(exportDatasetAsDdi.body().asString()).getString("codeBook.docDscr.citation.titlStmt.IDNo")); + Response reexportAllFormats = UtilIT.reexportDatasetAllFormats(datasetPersistentId); + reexportAllFormats.prettyPrint(); + reexportAllFormats.then().assertThat().statusCode(OK.getStatusCode()); + Response deleteDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); deleteDatasetResponse.prettyPrint(); assertEquals(200, deleteDatasetResponse.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 716cd1e8d84..ed4b68819a6 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1830,7 +1830,12 @@ static Response exportDataset(String datasetPersistentId, String exporter, Strin // .get("/api/datasets/:persistentId/export" + "?persistentId=" + datasetPersistentId + "&exporter=" + exporter); .get("/api/datasets/export" + "?persistentId=" + datasetPersistentId + "&exporter=" + exporter); } - + + static Response reexportDatasetAllFormats(String datasetPersistentId) { + return given() + .get("/api/admin/metadata/reExportDataset?persistentId=" + datasetPersistentId); + } + static Response exportDataverse(String identifier, String apiToken) { return given() .header(API_TOKEN_HTTP_HEADER, apiToken) From dc97593ed434118be116dd8dacaeee179dd069ad Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 9 Sep 2022 15:16:58 -0400 Subject: [PATCH 8/9] cross link related APIs #8720 --- doc/sphinx-guides/source/admin/metadataexport.rst | 6 ++++-- doc/sphinx-guides/source/api/native-api.rst | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 309b186e258..4deb82fe996 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -11,7 +11,9 @@ Publishing a dataset automatically starts a metadata export job, that will run i A scheduled timer job that runs nightly will attempt to export any published datasets that for whatever reason haven't been exported yet. This timer is activated automatically on the deployment, or restart, of the application. So, again, no need to start or configure it manually. (See the :doc:`timers` section of this Admin Guide for more information.) -Batch exports through the API +.. _batch-exports-through-the-api: + +Batch Exports Through the API ----------------------------- In addition to the automated exports, a Dataverse installation admin can start a batch job through the API. The following four API calls are provided: @@ -33,7 +35,7 @@ Instead of running "reExportAll" the same can be accomplished using "clearExport The difference is that when exporting prematurely fails due to some problem, the datasets that did not get exported yet still have the timestamps cleared. A next call to exportAll will skip the datasets already exported and try to export the ones that still need it. Calling clearExportTimestamps should return ``{"status":"OK","data":{"message":"cleared: X"}}`` where "X" is the total number of datasets cleared. -The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. +The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. Note that :ref:`export-dataset-metadata-api` is a related API. Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set. diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 339a291bf4d..93e1c36f179 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -840,7 +840,9 @@ The fully expanded example above (without environment variables) looks like this Export Metadata of a Dataset in Various Formats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -|CORS| Export the metadata of the current published version of a dataset in various formats see Note below: +|CORS| Export the metadata of the current published version of a dataset in various formats. + +See also :ref:`batch-exports-through-the-api` and the note below: .. code-block:: bash From bd47b8e7f459e9f3abb141c5837297466c7ac1ec Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 21 Sep 2022 15:46:16 -0400 Subject: [PATCH 9/9] support database IDs too (as well as PIDs) #8720 --- .../source/admin/metadataexport.rst | 6 +++++- .../edu/harvard/iq/dataverse/api/Metadata.java | 18 +++++------------- .../harvard/iq/dataverse/api/DatasetsIT.java | 4 ++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 10 ++++++++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadataexport.rst b/doc/sphinx-guides/source/admin/metadataexport.rst index 4deb82fe996..78b8c8ce223 100644 --- a/doc/sphinx-guides/source/admin/metadataexport.rst +++ b/doc/sphinx-guides/source/admin/metadataexport.rst @@ -24,7 +24,7 @@ In addition to the automated exports, a Dataverse installation admin can start a ``curl http://localhost:8080/api/admin/metadata/clearExportTimestamps`` -``curl http://localhost:8080/api/admin/metadata/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` +``curl http://localhost:8080/api/admin/metadata/:persistentId/reExportDataset?persistentId=doi:10.5072/FK2/AAA000`` The first will attempt to export all the published, local (non-harvested) datasets that haven't been exported yet. The second will *force* a re-export of every published, local dataset, regardless of whether it has already been exported or not. @@ -37,6 +37,10 @@ Calling clearExportTimestamps should return ``{"status":"OK","data":{"message":" The reExportDataset call gives you the opportunity to *force* a re-export of only a specific dataset and (with some script automation) could allow you the export specific batches of datasets. This might be usefull when handling exporting problems or when reExportAll takes too much time and is overkill. Note that :ref:`export-dataset-metadata-api` is a related API. +reExportDataset can be called with either ``persistentId`` (as shown above, with a DOI) or with the database id of a dataset (as shown below, with "42" as the database id). + +``curl http://localhost:8080/api/admin/metadata/42/reExportDataset`` + Note, that creating, modifying, or re-exporting an OAI set will also attempt to export all the unexported datasets found in the set. Export Failures diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java index b66928d70a1..b0d82b69d1b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metadata.java @@ -68,22 +68,14 @@ public Response reExportAll() { } @GET - @Path("reExportDataset") - public Response indexDatasetByPersistentId(@QueryParam("persistentId") String persistentId) { - if (persistentId == null) { - return error(Response.Status.BAD_REQUEST, "No persistent id given."); - } - Dataset dataset = null; + @Path("{id}/reExportDataset") + public Response indexDatasetByPersistentId(@PathParam("id") String id) { try { - dataset = datasetService.findByGlobalId(persistentId); - } catch (Exception ex) { - return error(Response.Status.BAD_REQUEST, "Problem looking up dataset with persistent id \"" + persistentId + "\". Error: " + ex.getMessage()); - } - if (dataset != null) { + Dataset dataset = findDatasetOrDie(id); datasetService.reExportDatasetAsync(dataset); return ok("export started"); - } else { - return error(Response.Status.BAD_REQUEST, "Could not find dataset with persistent id " + persistentId); + } catch (WrappedResponse wr) { + return wr.getResponse(); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index 9647f301805..ac4b4147a38 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -649,6 +649,10 @@ public void testExport() { reexportAllFormats.prettyPrint(); reexportAllFormats.then().assertThat().statusCode(OK.getStatusCode()); + Response reexportAllFormatsUsingId = UtilIT.reexportDatasetAllFormats(datasetId.toString()); + reexportAllFormatsUsingId.prettyPrint(); + reexportAllFormatsUsingId.then().assertThat().statusCode(OK.getStatusCode()); + Response deleteDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); deleteDatasetResponse.prettyPrint(); assertEquals(200, deleteDatasetResponse.getStatusCode()); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index ed4b68819a6..7107ee783d7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1831,9 +1831,15 @@ static Response exportDataset(String datasetPersistentId, String exporter, Strin .get("/api/datasets/export" + "?persistentId=" + datasetPersistentId + "&exporter=" + exporter); } - static Response reexportDatasetAllFormats(String datasetPersistentId) { + static Response reexportDatasetAllFormats(String idOrPersistentId) { + String idInPath = idOrPersistentId; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isDigits(idOrPersistentId)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + idOrPersistentId; + } return given() - .get("/api/admin/metadata/reExportDataset?persistentId=" + datasetPersistentId); + .get("/api/admin/metadata/" + idInPath + "/reExportDataset" + optionalQueryParam); } static Response exportDataverse(String identifier, String apiToken) {