From 7bf55e99992935239d617d66b55864f0041c3b2f Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Wed, 9 Mar 2022 23:43:20 +0000 Subject: [PATCH] Add lucene.searchBlockSize and refactor getReadable #277 --- src/main/config/run.properties.example | 4 + .../core/entity/EntityBaseBean.java | 3 +- .../core/manager/EntityBeanManager.java | 62 ++++---- .../icatproject/core/manager/GateKeeper.java | 147 ++++++++---------- .../icatproject/core/manager/HasEntityId.java | 8 + .../core/manager/PropertyHandler.java | 10 +- .../core/manager/ScoredEntityBaseBean.java | 12 +- .../org/icatproject/exposed/ICATRest.java | 2 +- src/main/resources/run.properties | 4 + .../icatproject/core/manager/TestLucene.java | 2 +- src/test/scripts/prepare_test.py | 1 + 11 files changed, 137 insertions(+), 118 deletions(-) create mode 100644 src/main/java/org/icatproject/core/manager/HasEntityId.java diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example index 06e73271..d7f12592 100644 --- a/src/main/config/run.properties.example +++ b/src/main/config/run.properties.example @@ -45,6 +45,10 @@ log.list = SESSION WRITE READ INFO # Lucene lucene.url = https://localhost:8181 lucene.populateBlockSize = 10000 +# Recommend setting lucene.searchBlockSize equal to maxIdsInQuery, so that all Lucene results can be authorised at once +# If lucene.searchBlockSize > maxIdsInQuery, then multiple auth checks may be needed for a single search to Lucene +# The optimal value depends on how likely a user's auth request fails: larger values are more efficient when rejection is more likely +lucene.searchBlockSize = 1000 lucene.directory = ${HOME}/data/icat/lucene lucene.backlogHandlerIntervalSeconds = 60 lucene.enqueuedRequestIntervalSeconds = 5 diff --git a/src/main/java/org/icatproject/core/entity/EntityBaseBean.java b/src/main/java/org/icatproject/core/entity/EntityBaseBean.java index afcc54e0..0b94d87d 100644 --- a/src/main/java/org/icatproject/core/entity/EntityBaseBean.java +++ b/src/main/java/org/icatproject/core/entity/EntityBaseBean.java @@ -31,6 +31,7 @@ import org.icatproject.core.manager.EntityInfoHandler; import org.icatproject.core.manager.EntityInfoHandler.Relationship; import org.icatproject.core.manager.GateKeeper; +import org.icatproject.core.manager.HasEntityId; import org.icatproject.core.manager.LuceneManager; import org.icatproject.core.parser.IncludeClause.Step; import org.slf4j.Logger; @@ -38,7 +39,7 @@ @SuppressWarnings("serial") @MappedSuperclass -public abstract class EntityBaseBean implements Serializable { +public abstract class EntityBaseBean implements HasEntityId, Serializable { private static final EntityInfoHandler eiHandler = EntityInfoHandler.getInstance(); diff --git a/src/main/java/org/icatproject/core/manager/EntityBeanManager.java b/src/main/java/org/icatproject/core/manager/EntityBeanManager.java index b46a2407..44cbc831 100644 --- a/src/main/java/org/icatproject/core/manager/EntityBeanManager.java +++ b/src/main/java/org/icatproject/core/manager/EntityBeanManager.java @@ -148,7 +148,7 @@ public enum PersistMode { private boolean luceneActive; private int maxEntities; - private int maxIdsInQuery; + private int luceneSearchBlockSize; private long exportCacheSize; private Set rootUserNames; @@ -782,28 +782,38 @@ private void exportTable(String beanName, Set ids, OutputStream output, } } - private void filterReadAccess(List results, List allResults, + private void filterReadAccess(List acceptedResults, List newResults, int maxCount, String userId, EntityManager manager, Class klass) throws IcatException { - logger.debug("Got " + allResults.size() + " results from Lucene"); - List allIds = new ArrayList<>(); - allResults.forEach(r -> allIds.add(r.getEntityBaseBeanId())); - List allowedIds = gateKeeper.getReadableIds(userId, allIds, klass, manager); - for (ScoredEntityBaseBean sr : allResults) { - try { - if (allowedIds.contains(sr.getEntityBaseBeanId())) { - results.add(sr); - } - if (results.size() > maxEntities) { - throw new IcatException(IcatExceptionType.VALIDATION, - "attempt to return more than " + maxEntities + " entities"); - } - if (results.size() == maxCount) { - break; + logger.debug("Got " + newResults.size() + " results from Lucene"); + Set allowedIds = gateKeeper.getReadableIds(userId, newResults, klass.getSimpleName(), manager); + if (allowedIds == null) { + // A null result means there are no restrictions on the readable ids, so add as + // many newResults as we need to reach maxCount + int needed = maxCount - acceptedResults.size(); + if (newResults.size() > needed) { + acceptedResults.addAll(newResults.subList(0, needed)); + } else { + acceptedResults.addAll(newResults); + } + if (acceptedResults.size() > maxEntities) { + throw new IcatException(IcatExceptionType.VALIDATION, + "attempt to return more than " + maxEntities + " entities"); + } + } else { + // Otherwise, add results in order until we reach maxCount + for (ScoredEntityBaseBean newResult : newResults) { + if (allowedIds.contains(newResult.getId())) { + acceptedResults.add(newResult); + if (acceptedResults.size() > maxEntities) { + throw new IcatException(IcatExceptionType.VALIDATION, + "attempt to return more than " + maxEntities + " entities"); + } + if (acceptedResults.size() == maxCount) { + break; + } } - } catch (IcatException e) { - // Nothing to do } } } @@ -1155,7 +1165,7 @@ void init() { notificationRequests = propertyHandler.getNotificationRequests(); luceneActive = lucene.isActive(); maxEntities = propertyHandler.getMaxEntities(); - maxIdsInQuery = propertyHandler.getMaxIdsInQuery(); + luceneSearchBlockSize = propertyHandler.getLuceneSearchBlockSize(); exportCacheSize = propertyHandler.getImportCacheSize(); rootUserNames = propertyHandler.getRootUserNames(); key = propertyHandler.getKey(); @@ -1400,7 +1410,7 @@ public List luceneDatafiles(String userName, String user, LuceneSearchResult last = null; Long uid = null; List allResults = Collections.emptyList(); - int blockSize = maxIdsInQuery; + int blockSize = luceneSearchBlockSize; do { if (last == null) { @@ -1421,7 +1431,7 @@ public List luceneDatafiles(String userName, String user, try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { gen.write("userName", userName); if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + gen.write("entityId", results.get(0).getId()); } gen.writeEnd(); } @@ -1439,7 +1449,7 @@ public List luceneDatasets(String userName, String user, S LuceneSearchResult last = null; Long uid = null; List allResults = Collections.emptyList(); - int blockSize = maxIdsInQuery; + int blockSize = luceneSearchBlockSize; do { if (last == null) { @@ -1459,7 +1469,7 @@ public List luceneDatasets(String userName, String user, S try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { gen.write("userName", userName); if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + gen.write("entityId", results.get(0).getId()); } gen.writeEnd(); } @@ -1486,7 +1496,7 @@ public List luceneInvestigations(String userName, String u LuceneSearchResult last = null; Long uid = null; List allResults = Collections.emptyList(); - int blockSize = maxIdsInQuery; + int blockSize = luceneSearchBlockSize; do { if (last == null) { @@ -1506,7 +1516,7 @@ public List luceneInvestigations(String userName, String u try (JsonGenerator gen = Json.createGenerator(baos).writeStartObject()) { gen.write("userName", userName); if (results.size() > 0) { - gen.write("entityId", results.get(0).getEntityBaseBeanId()); + gen.write("entityId", results.get(0).getId()); } gen.writeEnd(); } diff --git a/src/main/java/org/icatproject/core/manager/GateKeeper.java b/src/main/java/org/icatproject/core/manager/GateKeeper.java index f4cfe2eb..ae12fe5c 100644 --- a/src/main/java/org/icatproject/core/manager/GateKeeper.java +++ b/src/main/java/org/icatproject/core/manager/GateKeeper.java @@ -164,19 +164,17 @@ public Set getPublicTables() { return publicTables; } - public List getReadable(String userId, List beans, EntityManager manager) { - - if (beans.size() == 0) { - return beans; - } - - EntityBaseBean object = beans.get(0); - - Class objectClass = object.getClass(); - String simpleName = objectClass.getSimpleName(); + /** + * @param userId The user making the READ request + * @param simpleName The name of the requested entity type + * @param manager + * @return Returns a list of restrictions that apply to the requested entity + * type. If there are no restrictions, then returns null. + */ + private List getRestrictions(String userId, String simpleName, EntityManager manager) { if (rootUserNames.contains(userId)) { logger.info("\"Root\" user " + userId + " is allowed READ to " + simpleName); - return beans; + return null; } TypedQuery query = manager.createNamedQuery(Rule.INCLUDE_QUERY, String.class) @@ -184,93 +182,84 @@ public List getReadable(String userId, List bean List restrictions = query.getResultList(); logger.debug("Got " + restrictions.size() + " authz queries for READ by " + userId + " to a " - + objectClass.getSimpleName()); + + simpleName); for (String restriction : restrictions) { logger.debug("Query: " + restriction); if (restriction == null) { logger.info("Null restriction => READ permitted to " + simpleName); - return beans; + return null; } } - /* - * IDs are processed in batches to avoid Oracle error: ORA-01795: - * maximum number of expressions in a list is 1000 - */ + return restrictions; + } - List idLists = new ArrayList<>(); - StringBuilder sb = null; + /** + * Returns a sub list of the passed entities that the user has READ access to. + * + * @param userId The user making the READ request + * @param beans The entities the user wants to READ + * @param manager + * @return A list of entities the user has read access to + */ + public List getReadable(String userId, List beans, EntityManager manager) { - int i = 0; - for (EntityBaseBean bean : beans) { - if (i == 0) { - sb = new StringBuilder(); - sb.append(bean.getId()); - i = 1; - } else { - sb.append("," + bean.getId()); - i++; - } - if (i == maxIdsInQuery) { - i = 0; - idLists.add(sb.toString()); - sb = null; - } - } - if (sb != null) { - idLists.add(sb.toString()); + if (beans.size() == 0) { + return beans; } + EntityBaseBean object = beans.get(0); + Class objectClass = object.getClass(); + String simpleName = objectClass.getSimpleName(); - logger.debug("Check readability of " + beans.size() + " beans has been divided into " + idLists.size() - + " queries."); - - Set ids = new HashSet<>(); - for (String idList : idLists) { - for (String qString : restrictions) { - TypedQuery q = manager.createQuery(qString.replace(":pkids", idList), Long.class); - if (qString.contains(":user")) { - q.setParameter("user", userId); - } - ids.addAll(q.getResultList()); - } + List restrictions = getRestrictions(userId, simpleName, manager); + if (restrictions == null) { + return beans; } + Set readableIds = getReadableIds(userId, beans, restrictions, manager); + List results = new ArrayList<>(); for (EntityBaseBean bean : beans) { - if (ids.contains(bean.getId())) { + if (readableIds.contains(bean.getId())) { results.add(bean); } } return results; } - public List getReadableIds(String userId, List ids, - Class objectClass, EntityManager manager) { - if (ids.size() == 0) { - return ids; - } + /** + * @param userId The user making the READ request + * @param entities The entities to check + * @param simpleName The name of the requested entity type + * @param manager + * @return Set of the ids that the user has read access to. If there are no + * restrictions, then returns null. + */ + public Set getReadableIds(String userId, List entities, String simpleName, + EntityManager manager) { - String simpleName = objectClass.getSimpleName(); - if (rootUserNames.contains(userId)) { - logger.info("\"Root\" user " + userId + " is allowed READ to " + simpleName); - return ids; + if (entities.size() == 0) { + return null; } - TypedQuery query = manager.createNamedQuery(Rule.INCLUDE_QUERY, String.class) - .setParameter("member", userId).setParameter("bean", simpleName); + List restrictions = getRestrictions(userId, simpleName, manager); + if (restrictions == null) { + return null; + } - List restrictions = query.getResultList(); - logger.debug("Got " + restrictions.size() + " authz queries for READ by " + userId + " to a " - + objectClass.getSimpleName()); + return getReadableIds(userId, entities, restrictions, manager); + } - for (String restriction : restrictions) { - logger.debug("Query: " + restriction); - if (restriction == null) { - logger.info("Null restriction => READ permitted to " + simpleName); - return ids; - } - } + /** + * @param userId The user making the READ request + * @param entities The entities to check + * @param restrictions The restrictions applying to the entities + * @param manager + * @return Set of the ids that the user has read access to + */ + private Set getReadableIds(String userId, List entities, List restrictions, + EntityManager manager) { /* * IDs are processed in batches to avoid Oracle error: ORA-01795: @@ -281,13 +270,13 @@ public List getReadableIds(String userId, List ids, StringBuilder sb = null; int i = 0; - for (Long id : ids) { + for (HasEntityId entity : entities) { if (i == 0) { sb = new StringBuilder(); - sb.append(id); + sb.append(entity.getId()); i = 1; } else { - sb.append("," + id); + sb.append("," + entity.getId()); i++; } if (i == maxIdsInQuery) { @@ -300,7 +289,7 @@ public List getReadableIds(String userId, List ids, idLists.add(sb.toString()); } - logger.debug("Check readability of " + ids.size() + " beans has been divided into " + idLists.size() + logger.debug("Check readability of " + entities.size() + " beans has been divided into " + idLists.size() + " queries."); Set readableIds = new HashSet<>(); @@ -314,13 +303,7 @@ public List getReadableIds(String userId, List ids, } } - List results = new ArrayList<>(); - for (Long id : ids) { - if (readableIds.contains(id)) { - results.add(id); - } - } - return results; + return readableIds; } public Set getRootUserNames() { diff --git a/src/main/java/org/icatproject/core/manager/HasEntityId.java b/src/main/java/org/icatproject/core/manager/HasEntityId.java new file mode 100644 index 00000000..8ad36eb8 --- /dev/null +++ b/src/main/java/org/icatproject/core/manager/HasEntityId.java @@ -0,0 +1,8 @@ +package org.icatproject.core.manager; + +/** + * Interface for objects representing entities that hold the entity id. + */ +public interface HasEntityId { + public Long getId(); +} diff --git a/src/main/java/org/icatproject/core/manager/PropertyHandler.java b/src/main/java/org/icatproject/core/manager/PropertyHandler.java index f316bb64..7c1e45a9 100644 --- a/src/main/java/org/icatproject/core/manager/PropertyHandler.java +++ b/src/main/java/org/icatproject/core/manager/PropertyHandler.java @@ -302,6 +302,7 @@ public int getLifetimeMinutes() { private String digestKey; private URL luceneUrl; private int lucenePopulateBlockSize; + private int luceneSearchBlockSize; private Path luceneDirectory; private long luceneBacklogHandlerIntervalMillis; private Map cluster = new HashMap<>(); @@ -460,9 +461,12 @@ private void init() { luceneUrl = props.getURL("lucene.url"); formattedProps.add("lucene.url" + " " + luceneUrl); - lucenePopulateBlockSize = props.getPositiveInt("lucene.populateBlockSize"); + lucenePopulateBlockSize = props.getPositiveInt("lucene.searchBlockSize"); formattedProps.add("lucene.populateBlockSize" + " " + lucenePopulateBlockSize); + luceneSearchBlockSize = props.getPositiveInt("lucene.searchBlockSize"); + formattedProps.add("lucene.searchBlockSize" + " " + luceneSearchBlockSize); + luceneDirectory = props.getPath("lucene.directory"); if (!luceneDirectory.toFile().isDirectory()) { String msg = luceneDirectory + " is not a directory"; @@ -612,6 +616,10 @@ public int getLucenePopulateBlockSize() { return lucenePopulateBlockSize; } + public int getLuceneSearchBlockSize() { + return luceneSearchBlockSize; + } + public long getLuceneBacklogHandlerIntervalMillis() { return luceneBacklogHandlerIntervalMillis; } diff --git a/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java b/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java index 34c58306..6643e7ef 100644 --- a/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java +++ b/src/main/java/org/icatproject/core/manager/ScoredEntityBaseBean.java @@ -1,17 +1,17 @@ package org.icatproject.core.manager; -public class ScoredEntityBaseBean { +public class ScoredEntityBaseBean implements HasEntityId { - private long entityBaseBeanId; + private Long id; private float score; - public ScoredEntityBaseBean(long id, float score) { - this.entityBaseBeanId = id; + public ScoredEntityBaseBean(Long id, float score) { + this.id = id; this.score = score; } - public long getEntityBaseBeanId() { - return entityBaseBeanId; + public Long getId() { + return id; } public float getScore() { diff --git a/src/main/java/org/icatproject/exposed/ICATRest.java b/src/main/java/org/icatproject/exposed/ICATRest.java index bd1a2b4a..0871ef99 100644 --- a/src/main/java/org/icatproject/exposed/ICATRest.java +++ b/src/main/java/org/icatproject/exposed/ICATRest.java @@ -1108,7 +1108,7 @@ public String lucene(@Context HttpServletRequest request, @QueryParam("sessionId gen.writeStartArray(); for (ScoredEntityBaseBean sb : objects) { gen.writeStartObject(); - gen.write("id", sb.getEntityBaseBeanId()); + gen.write("id", sb.getId()); gen.write("score", sb.getScore()); gen.writeEnd(); } diff --git a/src/main/resources/run.properties b/src/main/resources/run.properties index 006bf2c9..2be4fe77 100644 --- a/src/main/resources/run.properties +++ b/src/main/resources/run.properties @@ -18,6 +18,10 @@ log.list = SESSION WRITE READ INFO lucene.url = https://localhost.localdomain:8181 lucene.populateBlockSize = 10000 +# Recommend setting lucene.searchBlockSize equal to maxIdsInQuery, so that all Lucene results can be authorised at once +# If lucene.searchBlockSize > maxIdsInQuery, then multiple auth checks may be needed for a single search to Lucene +# The optimal value depends on how likely a user's auth request fails: larger values are more efficient when rejection is more likely +lucene.searchBlockSize = 1000 lucene.directory = ${HOME}/data/lucene lucene.backlogHandlerIntervalSeconds = 60 lucene.enqueuedRequestIntervalSeconds = 3 diff --git a/src/test/java/org/icatproject/core/manager/TestLucene.java b/src/test/java/org/icatproject/core/manager/TestLucene.java index 216aa4c2..82d90c74 100644 --- a/src/test/java/org/icatproject/core/manager/TestLucene.java +++ b/src/test/java/org/icatproject/core/manager/TestLucene.java @@ -159,7 +159,7 @@ private void checkLsr(LuceneSearchResult lsr, Long... n) { Set got = new HashSet<>(); for (ScoredEntityBaseBean q : lsr.getResults()) { - got.add(q.getEntityBaseBeanId()); + got.add(q.getId()); } Set missing = new HashSet<>(wanted); diff --git a/src/test/scripts/prepare_test.py b/src/test/scripts/prepare_test.py index dbd8cf46..8cb6f39a 100644 --- a/src/test/scripts/prepare_test.py +++ b/src/test/scripts/prepare_test.py @@ -41,6 +41,7 @@ "log.list = SESSION WRITE READ INFO", "lucene.url = %s" % lucene_url, "lucene.populateBlockSize = 10000", + "lucene.searchBlockSize = 1000", "lucene.directory = %s/data/lucene" % subst["HOME"], "lucene.backlogHandlerIntervalSeconds = 60", "lucene.enqueuedRequestIntervalSeconds = 3",