Skip to content

Commit

Permalink
fix queries w.r.t. minorversion ordering
Browse files Browse the repository at this point in the history
  • Loading branch information
qqmyers committed Sep 19, 2024
1 parent 740b18d commit efb9005
Showing 1 changed file with 13 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,33 +168,20 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) {
}
}

// Note that this SQL line in the code below:
// datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))
// behaves somewhat counter-intuitively if the versionnumber and/or
// minorversionnumber is/are NULL - it results in an empty string
// (NOT the string "{dataset_id}:", in other words). Some harvested
// versions do not have version numbers (only the ones harvested from
// other Dataverses!) It works fine
// for our purposes below, because we are simply counting the selected
// lines - i.e. we don't care if some of these lines are empty.
// But do not use this notation if you need the values returned to
// meaningfully identify the datasets!


Query query = em.createNativeQuery(


"select count(*)\n"
+ "from (\n"
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n"
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.dataset_id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED' \n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ")
+ "and \n"
+ dataLocationLine // be careful about adding more and statements after this line.
+ "group by dataset_id \n"
+ " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n"
+") sub_temp"
);
logger.log(Level.FINE, "Metric query: {0}", query);
Expand All @@ -207,15 +194,15 @@ public List<Object[]> datasetsBySubjectToMonth(String yyyymm, String dataLocatio
// A published local datasets may have more than one released version!
// So that's why we have to jump through some extra hoops below
// in order to select the latest one:
String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" +
String originClause = "(datasetversion.id in\n" +
"(\n" +
"select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" +
"select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n" +
" from datasetversion\n" +
" join dataset on dataset.id = datasetversion.dataset_id\n" +
" where versionstate='RELEASED'\n" +
" and dataset.harvestingclient_id is null\n" +
" and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" +
" group by dataset_id\n" +
" order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n" +
"))\n";

if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL
Expand Down Expand Up @@ -273,15 +260,15 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) {
Query query = em.createNativeQuery(
"select count(*)\n"
+ "from (\n"
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n"
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED' \n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and \n"
+ dataLocationLine // be careful about adding more and statements after this line.
+ "group by dataset_id \n"
+ " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n"
+") sub_temp"
);
logger.log(Level.FINE, "Metric query: {0}", query);
Expand Down Expand Up @@ -322,17 +309,17 @@ public long filesToMonth(String yyyymm, Dataverse d) {
+ "select count(*)\n"
+ "from filemetadata\n"
+ "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n"
+ "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n"
+ "where datasetversion.id in \n"
+ "(\n"
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED'\n"
+ ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n"
+ "and dataset.harvestingclient_id is null\n"
+ "group by dataset_id \n"
+ "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber \n"
+ ");"
);
logger.log(Level.FINE, "Metric query: {0}", query);
Expand All @@ -345,17 +332,17 @@ public long filesPastDays(int days, Dataverse d) {
+ "select count(*)\n"
+ "from filemetadata\n"
+ "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n"
+ "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n"
+ "where datasetversion.id in \n"
+ "(\n"
+ "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n"
+ "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n"
+ "from datasetversion\n"
+ "join dataset on dataset.id = datasetversion.dataset_id\n"
+ ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n")
+ "where versionstate='RELEASED'\n"
+ "and releasetime > current_date - interval '" + days + "' day\n"
+ ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n")
+ "and dataset.harvestingclient_id is null\n"
+ "group by dataset_id \n"
+ "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n"
+ ");"
);
logger.log(Level.FINE, "Metric query: {0}", query);
Expand Down

0 comments on commit efb9005

Please sign in to comment.