Skip to content

Commit

Permalink
recursive indexing through list of ids, more logging #4406
Browse files Browse the repository at this point in the history
  • Loading branch information
ferrys committed Apr 18, 2018
1 parent 794b939 commit ac883a8
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 44 deletions.
22 changes: 22 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,28 @@ private List<Dataset> findByOwnerId(Long ownerId, boolean onlyPublished) {
return retList;
}
}

public List<Long> findIdsByOwnerId(Long ownerId) {
return findIdsByOwnerId(ownerId, false);
}

private List<Long> findIdsByOwnerId(Long ownerId, boolean onlyPublished) {
List<Long> retList = new ArrayList<>();
if (!onlyPublished) {
TypedQuery<Long> query = em.createQuery("select o.id from Dataset as o where o.owner.id =:ownerId order by o.id", Long.class);
query.setParameter("ownerId", ownerId);
return query.getResultList();
} else {
TypedQuery<Dataset> query = em.createQuery("select object(o) from Dataset as o where o.owner.id =:ownerId order by o.id", Dataset.class);
query.setParameter("ownerId", ownerId);
for (Dataset ds : query.getResultList()) {
if (ds.isReleased() && !ds.isDeaccessioned()) {
retList.add(ds.getId());
}
}
return retList;
}
}

public List<Dataset> findAll() {
return em.createQuery("select object(o) from Dataset as o order by o.id", Dataset.class).getResultList();
Expand Down
29 changes: 17 additions & 12 deletions src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ public List<Dataverse> findByOwnerId(Long ownerId) {
return em.createQuery(qr, Dataverse.class).setParameter("ownerId", ownerId).getResultList();
}

public List<Long> findIdsByOwnerId(Long ownerId) {
String qr = "select o.id from Dataverse as o where o.owner.id =:ownerId order by o.id";
return em.createQuery(qr, Long.class).setParameter("ownerId", ownerId).getResultList();
}

public List<Dataverse> findPublishedByOwnerId(Long ownerId) {
String qr ="select object(o) from Dataverse as o where o.owner.id =:ownerId and o.publicationDate is not null order by o.name";
return em.createQuery(qr, Dataverse.class).setParameter("ownerId", ownerId).getResultList();
Expand Down Expand Up @@ -559,37 +564,37 @@ public void populateDvSearchCard(SolrSearchResult solrSearchResult) {
}
}

// function to recursively find all children of a dataverse that
// function to recursively find ids of all children of a dataverse that
// are also of type dataverse
public List<Dataverse> findAllDataverseDataverseChildren(Dataverse dv) {
public List<Long> findAllDataverseDataverseChildren(Long dvId) {
// get list of Dataverse children
List<Dataverse> dataverseChildren = findByOwnerId(dv.getId());
List<Long> dataverseChildren = findIdsByOwnerId(dvId);

if (dataverseChildren == null) {
return dataverseChildren;
} else {
List<Dataverse> newChildren = new ArrayList<>();
for (Dataverse childDv : dataverseChildren) {
newChildren.addAll(findAllDataverseDataverseChildren(childDv));
List<Long> newChildren = new ArrayList<>();
for (Long childDvId : dataverseChildren) {
newChildren.addAll(findAllDataverseDataverseChildren(childDvId));
}
dataverseChildren.addAll(newChildren);
return dataverseChildren;
}
}

// function to recursively find all children of a dataverse that are
// function to recursively find ids of all children of a dataverse that are
// of type dataset
public List<Dataset> findAllDataverseDatasetChildren(Dataverse dv) {
public List<Long> findAllDataverseDatasetChildren(Long dvId) {
// get list of Dataverse children
List<Dataverse> dataverseChildren = findByOwnerId(dv.getId());
List<Long> dataverseChildren = findIdsByOwnerId(dvId);
// get list of Dataset children
List<Dataset> datasetChildren = datasetService.findByOwnerId(dv.getId());
List<Long> datasetChildren = datasetService.findIdsByOwnerId(dvId);

if (dataverseChildren == null) {
return datasetChildren;
} else {
for (Dataverse childDv : dataverseChildren) {
datasetChildren.addAll(findAllDataverseDatasetChildren(childDv));
for (Long childDvId : dataverseChildren) {
datasetChildren.addAll(findAllDataverseDatasetChildren(childDvId));
}
return datasetChildren;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,15 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}

logger.info("Getting dataset children of dataverse...");
List<Dataset> datasetChildren = ctxt.dataverses().findAllDataverseDatasetChildren(moved);
List<Dataset> datasetChildren = new ArrayList<>();
List<Long> datasetChildrenIds = ctxt.dataverses().findAllDataverseDatasetChildren(moved.getId());
datasetChildrenIds.forEach( (dsId) -> datasetChildren.add(ctxt.datasets().find(dsId)) );

logger.info("Getting dataverse children of dataverse...");
List<Dataverse> dataverseChildren = ctxt.dataverses().findAllDataverseDataverseChildren(moved);
List<Dataverse> dataverseChildren = new ArrayList<>();
List<Long> dataverseChildrenIds = ctxt.dataverses().findAllDataverseDataverseChildren(moved.getId());
dataverseChildrenIds.forEach( (dvId) -> dataverseChildren.add(ctxt.dataverses().find(dvId)) );

dataverseChildren.add(moved); // include the root of the children


Expand All @@ -100,8 +105,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
ownersToCheck.addAll(destination.getOwners());
}

// if all the dataverse's datasets GUESTBOOKS are not contained in the new dataverse then remove the
// ones that aren't
// generate list of destination guestbooks to check against
List<Guestbook> destinationGbs = null;
if (moved.getGuestbooks() != null) {
List<Guestbook> movedGbs = moved.getGuestbooks();
Expand Down Expand Up @@ -130,7 +134,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}
}

// if the dataverses default TEMPLATE is not contained in the new dataverse then remove it
// generate a list of templates in destination to check against
List<Template> destinationTemplates = null;
if (moved.getTemplates() != null) {
List<Template> movedTemplates = moved.getTemplates();
Expand All @@ -144,10 +148,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
destinationTemplates.addAll(movedTemplates);
}

// if all the dataverses METADATA BLOCKS are not contained in the new dataverse then remove the
// ones that aren't available in the destination
// i.e. the case where a custom metadata block is available through a parent
// but then the dataverse is moved outside of that parent-child structure
// generate a list of metadatablocks in destination to check against
Boolean inheritMbValue = null;
List<Dataverse> mbParentsToCheck = new ArrayList<>();
mbParentsToCheck.addAll(ownersToCheck);
Expand All @@ -160,6 +161,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException {

logger.info("Checking templates and metadata blocks");
for (Dataverse dv : dataverseChildren) {
// if the dataverses default TEMPLATE is not contained in the
// destination dataverse, remove it
if (destinationTemplates != null) {
Template dvt = dv.getDefaultTemplate();
if (dvt != null && !destinationTemplates.contains(dvt)) {
Expand All @@ -171,8 +174,10 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}
}

// determine which metadata blocks to keep selected
// on the moved dataverse and its children
// if all the dataverses METADATA BLOCKS are not contained in the new dataverse then remove the
// ones that aren't available in the destination
// i.e. the case where a custom metadata block is available through a parent
// but then the dataverse is moved outside of that parent-child structure
if (inheritMbValue != null) {
List<MetadataBlock> metadataBlocksToKeep = new ArrayList<>();
List<MetadataBlock> movedMbs = dv.getMetadataBlocks(true);
Expand All @@ -198,6 +203,7 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}
}

// get list of dataverses each child links to
if (dv.getDataverseLinkingDataverses() != null) {
linkingDataverses.addAll(dv.getDataverseLinkingDataverses());
}
Expand All @@ -206,6 +212,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
List<DatasetLinkingDataverse> linkingDatasets = new ArrayList();
logger.info("Checking guestbooks...");
for (Dataset ds : datasetChildren) {
// if all the dataverse's datasets GUESTBOOKS are not
//contained in the new dataverse, then remove them
Guestbook dsgb = ds.getGuestbook();
if (dsgb != null && (destinationGbs == null || !destinationGbs.contains(dsgb))) {
if (force == null || !force) {
Expand All @@ -214,11 +222,15 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}
ds.setGuestbook(null);
}

// get list of dataverses each child dataset links to
if (ds.getDatasetLinkingDataverses() != null) {
linkingDatasets.addAll(ds.getDatasetLinkingDataverses());
}
}

// if a dataverse links to its destination dataverse or any of
// its destinations owners, remove the link
for (DataverseLinkingDataverse dvld : linkingDataverses) {
logger.info("Checking linked dataverses....");
for (Dataverse owner : ownersToCheck){
Expand All @@ -233,6 +245,8 @@ public void executeImpl(CommandContext ctxt) throws CommandException {
}
}

// if a dataset links to its destination dataverse or any of
// its destinations owners, remove the link
for (DatasetLinkingDataverse dsld : linkingDatasets) {
logger.info("Checking linked datasets...");
for (Dataverse owner : ownersToCheck){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,28 +164,52 @@ public Future<String> indexAllOrSubset(long numPartitions, long partitionId, boo
logger.info(status);
return new AsyncResult<>(status);
}

@Asynchronous
public void indexDataverseRecursively(Dataverse dataverse) {
long start = System.currentTimeMillis();
// index the Dataverse of current recursion
indexService.indexDataverseInNewTransaction(dataverse);
int datasetIndexCount = 0, datasetFailureCount = 0, dataverseIndexCount = 0, dataverseFailureCount = 0;
// get list of Dataverse children
List<Dataverse> dataverseChildren = dataverseService.findByOwnerId(dataverse.getId());

List<Long> dataverseChildren = dataverseService.findAllDataverseDataverseChildren(dataverse.getId());
dataverseChildren.add(dataverse.getId());

// get list of Dataset children
List<Dataset> datasetChildren = datasetService.findByOwnerId(dataverse.getId());
List<Long> datasetChildren = dataverseService.findAllDataverseDatasetChildren(dataverse.getId());

// index the Dataset children
for (Dataset child : datasetChildren) {
indexService.indexDatasetInNewTransaction(child.getId());
logger.info("Starting index on " + dataverseChildren.size() + " dataverses and " + datasetChildren.size() + " datasets.");

// index the Dataverse children
for (Long childId : dataverseChildren) {
try {
dataverseIndexCount++;
Dataverse dv = dataverseService.find(childId);
logger.info("indexing dataverse " + dataverseIndexCount + " of " + dataverseChildren.size() + " (id=" + childId + ", persistentId=" + dv.getAlias() + ")");
Future<String> result = indexService.indexDataverseInNewTransaction(dv);
dv = null;
} catch (Exception e) {
//We want to keep running even after an exception so throw some more info into the log
dataverseFailureCount++;
logger.info("FAILURE indexing dataverse " + dataverseIndexCount + " of " + dataverseChildren.size() + " (id=" + childId + ") Exception info: " + e.getMessage());
}
}
// recursively index the Dataverse children
for (Dataverse child : dataverseChildren) {
indexDataverseRecursively(child);

// index the Dataset children
for (Long childId : datasetChildren) {
try {
datasetIndexCount++;
logger.info("indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ")");
indexService.indexDatasetInNewTransaction(childId);
} catch (Exception e) {
//We want to keep running even after an exception so throw some more info into the log
datasetFailureCount++;
logger.info("FAILURE indexing dataset " + datasetIndexCount + " of " + datasetChildren.size() + " (id=" + childId + ") Exception info: " + e.getMessage());
}
}
long end = System.currentTimeMillis();
logger.info("Time to index so far: " + (end - start));
if (datasetFailureCount + dataverseFailureCount > 0){
logger.info("There were index failures. " + dataverseFailureCount + " dataverse(s) and " + datasetFailureCount + " dataset(s) failed to index. Please check the log for more information.");
}
logger.info(dataverseIndexCount + " dataverses and " + datasetIndexCount + " datasets indexed. Total time to index " + (end - start) + ".");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -176,25 +176,35 @@ public Dataverse save(Dataverse dataverse) {
// no-op. The superclass accesses databases which we don't have.
return dataverse;
}
@Override
public Dataverse find(Object pk) {
// fake this for what we need
if (pk instanceof Long) {
if ((Long)pk == 10) {
return grandchildEE;
}
}
return new Dataverse();
}
@Override
public List<Dataverse> findByOwnerId(Long ownerId) {
return new ArrayList<>();
}
@Override
public List<Dataverse> findAllDataverseDataverseChildren(Dataverse dv) {
public List<Long> findAllDataverseDataverseChildren(Long dvId) {
// fake this for what we need
List<Dataverse> fakeChildren = new ArrayList<>();
if (dv.getId() == 9){
fakeChildren.add(grandchildEE);
List<Long> fakeChildren = new ArrayList<>();
if (dvId == 9){
fakeChildren.add(grandchildEE.getId());
}
return fakeChildren;
}
@Override
public List<Dataset> findAllDataverseDatasetChildren(Dataverse dv) {
public List<Long> findAllDataverseDatasetChildren(Long dvId) {
// fake this for what we need
List<Dataset> fakeChildren = new ArrayList<>();
if (dv.getId() == 6) {
fakeChildren.add(datasetCC);
List<Long> fakeChildren = new ArrayList<>();
if (dvId == 6) {
fakeChildren.add(datasetCC.getId());
}
return fakeChildren;
}
Expand Down Expand Up @@ -241,6 +251,16 @@ public DatasetServiceBean datasets() {
public List<Dataset> findByOwnerId(Long ownerId) {
return new ArrayList<>();
}
@Override
public Dataset find(Object pk) {
// fake this for what we need
if (pk instanceof Long) {
if ((Long)pk == 2) {
return datasetCC;
}
}
return new Dataset();
}
};
}
@Override
Expand Down

0 comments on commit ac883a8

Please sign in to comment.