diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 307f842f52b..530c703ef97 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -12,6 +12,7 @@ import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.FinalizeDatasetPublicationCommand; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean; @@ -19,11 +20,9 @@ import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.workflows.WorkflowComment; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; @@ -48,9 +47,6 @@ import javax.persistence.Query; import javax.persistence.StoredProcedureQuery; import javax.persistence.TypedQuery; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamWriter; import org.apache.commons.lang.RandomStringUtils; import org.ocpsoft.common.util.Strings; @@ -928,4 +924,35 @@ public long findStorageSize(Dataset dataset, boolean countCachedExtras) throws I return total; } + + /** + * An optimized method for deleting a harvested dataset. + * + * @param dataset + * @param request DataverseRequest (for initializing the DestroyDatasetCommand) + * @param hdLogger logger object (in practice, this will be a separate log file created for a specific harvesting job) + */ + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Logger hdLogger) { + // Purge all the SOLR documents associated with this client from the + // index server: + indexService.deleteHarvestedDocuments(dataset); + + try { + // files from harvested datasets are removed unceremoniously, + // directly in the database. no need to bother calling the + // DeleteFileCommand on them. + for (DataFile harvestedFile : dataset.getFiles()) { + DataFile merged = em.merge(harvestedFile); + em.remove(merged); + harvestedFile = null; + } + dataset.setFiles(null); + Dataset merged = em.merge(dataset); + commandEngine.submit(new DestroyDatasetCommand(merged, request)); + hdLogger.info("Successfully destroyed the dataset"); + } catch (Exception ex) { + hdLogger.warning("Failed to destroy the dataset"); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index 4ecc62979e2..b75811e6698 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -42,6 +42,7 @@ import java.io.StringReader; import java.nio.file.Files; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.Set; import java.util.logging.Formatter; @@ -197,7 +198,7 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException { + public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException { if (harvestingClient == null || harvestingClient.getDataverse() == null) { throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient."); } @@ -275,6 +276,10 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve ds.setOwner(owner); ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields()); + if (ds.getVersions().get(0).getReleaseTime() == null) { + ds.getVersions().get(0).setReleaseTime(oaiDateStamp); + } + // Check data against required contraints List> violations = ds.getVersions().get(0).validateRequired(); if (!violations.isEmpty()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java index 7f7fb0662a4..84e28b25f5c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DestroyDatasetCommand.java @@ -80,7 +80,9 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { } //also, lets delete the uploaded thumbnails! - deleteDatasetLogo(doomed); + if (!doomed.isHarvested()) { + deleteDatasetLogo(doomed); + } // ASSIGNMENTS @@ -92,17 +94,20 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.em().remove(ra); } - GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); - try{ - if(idServiceBean.alreadyExists(doomed)){ - idServiceBean.deleteIdentifier(doomed); - for (DataFile df : doomed.getFiles()) { - idServiceBean.deleteIdentifier(df); + if (!doomed.isHarvested()) { + GlobalIdServiceBean idServiceBean = GlobalIdServiceBean.getBean(ctxt); + try { + if (idServiceBean.alreadyExists(doomed)) { + idServiceBean.deleteIdentifier(doomed); + for (DataFile df : doomed.getFiles()) { + idServiceBean.deleteIdentifier(df); + } } + } catch (Exception e) { + logger.log(Level.WARNING, "Identifier deletion was not successful:", e.getMessage()); } - } catch (Exception e) { - logger.log(Level.WARNING, "Identifier deletion was not successfull:", e.getMessage()); - } + } + Dataverse toReIndex = managedDoomed.getOwner(); // dataset diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 40058dc734f..9d0ee40d9d3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -27,8 +27,6 @@ import javax.ejb.EJBException; import javax.ejb.Stateless; import javax.ejb.Timer; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; import javax.faces.bean.ManagedBean; import javax.inject.Named; //import javax.xml.bind.Unmarshaller; @@ -39,14 +37,9 @@ import org.xml.sax.SAXException; import com.lyncode.xoai.model.oaipmh.Header; -import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.api.imports.ImportServiceBean; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; -import edu.harvard.iq.dataverse.engine.command.impl.DeleteDatasetCommand; import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandler; import edu.harvard.iq.dataverse.harvest.client.oai.OaiHandlerException; import edu.harvard.iq.dataverse.search.IndexServiceBean; @@ -263,13 +256,14 @@ private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClien Header h = idIter.next(); String identifier = h.getIdentifier(); + Date dateStamp = h.getDatestamp(); - hdLogger.info("processing identifier: " + identifier); + hdLogger.info("processing identifier: " + identifier + ", date: " + dateStamp); MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); // Retrieve and process this record with a separate GetRecord call: - Long datasetId = processRecord(dataverseRequest, hdLogger, importCleanupLog, oaiHandler, identifier, getRecordErrorOccurred, processedSizeThisBatch, deletedIdentifiers); + Long datasetId = processRecord(dataverseRequest, hdLogger, importCleanupLog, oaiHandler, identifier, getRecordErrorOccurred, processedSizeThisBatch, deletedIdentifiers, dateStamp); hdLogger.info("Total content processed in this batch so far: "+processedSizeThisBatch); if (datasetId != null) { @@ -315,8 +309,7 @@ private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClien - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, PrintWriter importCleanupLog, OaiHandler oaiHandler, String identifier, MutableBoolean recordErrorOccurred, MutableLong processedSizeThisBatch, List deletedIdentifiers) { + private Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, PrintWriter importCleanupLog, OaiHandler oaiHandler, String identifier, MutableBoolean recordErrorOccurred, MutableLong processedSizeThisBatch, List deletedIdentifiers, Date dateStamp) { String errMessage = null; Dataset harvestedDataset = null; logGetRecord(hdLogger, oaiHandler, identifier); @@ -334,7 +327,7 @@ public Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, Pr Dataset dataset = datasetService.getDatasetByHarvestInfo(oaiHandler.getHarvestingClient().getDataverse(), identifier); if (dataset != null) { hdLogger.info("Deleting dataset " + dataset.getGlobalIdString()); - deleteHarvestedDataset(dataset, dataverseRequest, hdLogger); + datasetService.deleteHarvestedDataset(dataset, dataverseRequest, hdLogger); // TODO: // check the status of that Delete - see if it actually succeeded deletedIdentifiers.add(identifier); @@ -351,7 +344,8 @@ public Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, Pr oaiHandler.getHarvestingClient(), identifier, oaiHandler.getMetadataPrefix(), - record.getMetadataFile(), + record.getMetadataFile(), + dateStamp, importCleanupLog); hdLogger.fine("Harvest Successful for identifier " + identifier); @@ -388,36 +382,7 @@ public Long processRecord(DataverseRequest dataverseRequest, Logger hdLogger, Pr return harvestedDataset != null ? harvestedDataset.getId() : null; } - - private void deleteHarvestedDataset(Dataset dataset, DataverseRequest request, Logger hdLogger) { - // Purge all the SOLR documents associated with this client from the - // index server: - indexService.deleteHarvestedDocuments(dataset); - - try { - // files from harvested datasets are removed unceremoniously, - // directly in the database. no need to bother calling the - // DeleteFileCommand on them. - for (DataFile harvestedFile : dataset.getFiles()) { - DataFile merged = em.merge(harvestedFile); - em.remove(merged); - harvestedFile = null; - } - dataset.setFiles(null); - Dataset merged = em.merge(dataset); - engineService.submit(new DeleteDatasetCommand(request, merged)); - } catch (IllegalCommandException ex) { - // TODO: log the result - } catch (PermissionException ex) { - // TODO: log the result - } catch (CommandException ex) { - // TODO: log the result - } - - // TODO: log the success result - } - - + private void logBeginOaiHarvest(Logger hdLogger, HarvestingClient harvestingClient) { hdLogger.log(Level.INFO, "BEGIN HARVEST, oaiUrl=" +harvestingClient.getHarvestingUrl()