Skip to content

Commit

Permalink
Added an API for registering within a collection. More doc changes. #…
Browse files Browse the repository at this point in the history
  • Loading branch information
landreev committed Jun 21, 2023
1 parent d902fed commit 4f1afd3
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 7 deletions.
27 changes: 22 additions & 5 deletions doc/sphinx-guides/source/admin/dataverses-datasets.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Managing Datasets and Dataverse Collections
anaging Datasets and Dataverse Collections
===========================================

.. contents:: |toctitle|
Expand Down Expand Up @@ -153,15 +153,32 @@ Mint a PID for a File That Does Not Have One
In the following example, the database id of the file is 42::

export FILE_ID=42
curl http://localhost:8080/api/admin/$FILE_ID/registerDataFile
curl "http://localhost:8080/api/admin/$FILE_ID/registerDataFile"

Mint PIDs for Files That Do Not Have Them
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Mint PIDs for all unregistered published files in the specified collection
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

If you have a large number of files, you might want to consider miniting PIDs for files individually using the ``registerDataFile`` endpoint above in a for loop, sleeping between each registration::
The following API will register the PIDs for all the yet unregistered published files in the datasets **directly within the collection** specified by its alias::

curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}"

It will not attempt to register the datafiles in its sub-collections, so this call will need to be repeated on any sub-collections where files need to be registered as well. File-level PID registration must be enabled on the collection. (Note that it is possible to have it enabled for a specific collection, even when it is disabled for the Dataverse installation as a whole. See :ref:`collection-attributes-api` in the Native API Guide.)

This API will sleep for 1 second between registration calls by default. A longer sleep interval can be specified with an optional ``sleep=`` parameter::

curl "http://localhost:8080/api/admin/registerDataFiles/{collection_alias}?sleep=5"

Mint PIDs for ALL unregistered files in the database
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The following API will attempt to register the PIDs for all the published files in your instance that do not yet have them::

curl http://localhost:8080/api/admin/registerDataFileAll

The application will attempt to sleep for 1 second between registration attempts as not to overload your persistent identifier service provider. Note that if you have a large number of files that need to be registered in your Dataverse, you may want to consider minting file PIDs within indivdual collections, or even for individual files using the ``registerDataFiles`` and/or ``registerDataFile`` endpoints above in a loop, with a longer sleep interval between calls.



Mint a New DOI for a Dataset with a Handle
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ Change Collection Attributes

.. code-block::
curl -H X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/attribute/$ATTRIBUTE?value=$VALUE
curl -X PUT -H "X-Dataverse-key:$API_TOKEN $SERVER_URL/api/dataverses/$ID/attribute/$ATTRIBUTE?value=$VALUE"
The following attributes are supported:

Expand Down
12 changes: 12 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DataFileServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,18 @@ public List<DataFile> findByDatasetId(Long studyId) {
.setParameter("studyId", studyId).getResultList();
}

/**
*
* @param collectionId numeric id of the parent collection ("dataverse")
* @return list of files in the datasets that are *direct* children of the collection specified
* (i.e., no datafiles in sub-collections of this collection will be included)
*/
public List<DataFile> findByDirectCollectionOwner(Long collectionId) {
String queryString = "select f from DataFile f, Dataset d where f.owner.id = d.id and d.owner.id = :collectionId order by f.id";
return em.createQuery(queryString, DataFile.class)
.setParameter("collectionId", collectionId).getResultList();
}

public List<DataFile> findAllRelatedByRootDatafileId(Long datafileId) {
/*
Get all files with the same root datafile id
Expand Down
90 changes: 89 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/Admin.java
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,7 @@ public Response fixMissingOriginalTypes() {
"All the tabular files in the database already have the original types set correctly; exiting.");
} else {
for (Long fileid : affectedFileIds) {
logger.info("found file id: " + fileid);
logger.fine("found file id: " + fileid);
}
info.add("message", "Found " + affectedFileIds.size()
+ " tabular files with missing original types. Kicking off an async job that will repair the files in the background.");
Expand Down Expand Up @@ -1566,6 +1566,12 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) {
} catch (Exception e) {
logger.info("Unexpected Exception: " + e.getMessage());
}

try {
Thread.sleep(1000);
} catch (InterruptedException ie) {
logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!");
}
}
logger.info("Final Results:");
logger.info(alreadyRegistered + " of " + count + " files were already registered. " + new Date());
Expand All @@ -1577,6 +1583,88 @@ public Response registerDataFileAll(@Context ContainerRequestContext crc) {
return ok("Datafile registration complete." + successes + " of " + released
+ " unregistered, published files registered successfully.");
}

@GET
@AuthRequired
@Path("/registerDataFiles/{alias}")
public Response registerDataFilesInCollection(@Context ContainerRequestContext crc, @PathParam("alias") String alias, @QueryParam("sleep") Integer sleepInterval) {
Dataverse collection;
try {
collection = findDataverseOrDie(alias);
} catch (WrappedResponse r) {
return r.getResponse();
}

AuthenticatedUser superuser = authSvc.getAdminUser();
if (superuser == null) {
return error(Response.Status.INTERNAL_SERVER_ERROR, "Cannot find the superuser to execute /admin/registerDataFiles.");
}

if (!systemConfig.isFilePIDsEnabledForCollection(collection)) {
return ok("Registration of file-level pid is disabled in collection "+alias+"; nothing to do");
}

List<DataFile> dataFiles = fileService.findByDirectCollectionOwner(collection.getId());
Integer count = dataFiles.size();
Integer countSuccesses = 0;
Integer countAlreadyRegistered = 0;
Integer countReleased = 0;
Integer countDrafts = 0;

if (sleepInterval == null) {
sleepInterval = 1;
} else if (sleepInterval.intValue() < 1) {
return error(Response.Status.BAD_REQUEST, "Invalid sleep interval: "+sleepInterval);
}

logger.info("Starting to register: analyzing " + count + " files. " + new Date());
logger.info("Only unregistered, published files will be registered.");



for (DataFile df : dataFiles) {
try {
if ((df.getIdentifier() == null || df.getIdentifier().isEmpty())) {
if (df.isReleased()) {
countReleased++;
DataverseRequest r = createDataverseRequest(superuser);
execCommand(new RegisterDvObjectCommand(r, df));
countSuccesses++;
if (countSuccesses % 100 == 0) {
logger.info(countSuccesses + " out of " + count + " files registered successfully. " + new Date());
}
} else {
countDrafts++;
logger.fine(countDrafts + " out of " + count + " files not yet published");
}
} else {
countAlreadyRegistered++;
logger.fine(countAlreadyRegistered + " out of " + count + " files are already registered. " + new Date());
}
} catch (WrappedResponse ex) {
countReleased++;
logger.info("Failed to register file id: " + df.getId());
Logger.getLogger(Datasets.class.getName()).log(Level.SEVERE, null, ex);
} catch (Exception e) {
logger.info("Unexpected Exception: " + e.getMessage());
}

try {
Thread.sleep(sleepInterval * 1000);
} catch (InterruptedException ie) {
logger.warning("Interrupted Exception when attempting to execute Thread.sleep()!");
}
}

logger.info(countAlreadyRegistered + " out of " + count + " files were already registered. " + new Date());
logger.info(countDrafts + " out of " + count + " files are not yet published. " + new Date());
logger.info(countReleased + " out of " + count + " unregistered, published files to register. " + new Date());
logger.info(countSuccesses + " out of " + countReleased + " unregistered, published files registered successfully. "
+ new Date());

return ok("Datafile registration complete. " + countSuccesses + " out of " + countReleased
+ " unregistered, published files registered successfully.");
}

@GET
@AuthRequired
Expand Down

0 comments on commit 4f1afd3

Please sign in to comment.