From 4710a7472f728c9e9bcebd1c25001fddbe3866ec Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Thu, 31 Jan 2019 16:33:05 +0200 Subject: [PATCH] SQL: Implement FIRST/LAST aggregate functions (#37936) FIRST and LAST can be used with one argument and work similarly to MIN and MAX but they are implemented using a Top Hits aggregation and therefore can also operate on keyword fields. When a second argument is provided then they return the first/last value of the first arg when its values are ordered ascending/descending (respectively) by the values of the second argument. Currently because of the usage of a Top Hits aggregation FIRST and LAST cannot be used in the HAVING clause of a GROUP BY query to filter on the results of the aggregation. Closes: #35639 --- docs/reference/sql/functions/aggs.asciidoc | 198 ++++++++++++++++++ docs/reference/sql/limitations.asciidoc | 7 + .../xpack/sql/qa/cli/ShowTestCase.java | 6 + .../sql/qa/src/main/resources/agg.csv-spec | 73 +++++++ .../qa/src/main/resources/command.csv-spec | 8 +- .../sql/qa/src/main/resources/docs.csv-spec | 143 ++++++++++++- .../xpack/sql/analysis/analyzer/Verifier.java | 44 +++- .../xpack/sql/execution/search/Querier.java | 7 + .../search/extractor/BucketExtractors.java | 1 + .../search/extractor/TopHitsAggExtractor.java | 94 +++++++++ .../xpack/sql/expression/Foldables.java | 8 - .../expression/function/FunctionRegistry.java | 10 +- .../expression/function/aggregate/First.java | 35 ++++ .../expression/function/aggregate/Last.java | 35 ++++ .../expression/function/aggregate/Max.java | 6 +- .../expression/function/aggregate/Min.java | 6 +- .../function/aggregate/TopHits.java | 67 ++++++ .../xpack/sql/optimizer/Optimizer.java | 43 +++- .../xpack/sql/planner/QueryFolder.java | 8 +- .../xpack/sql/planner/QueryTranslator.java | 47 ++++- .../xpack/sql/querydsl/agg/Aggs.java | 28 +-- .../xpack/sql/querydsl/agg/TopHitsAgg.java | 79 +++++++ .../querydsl/container/QueryContainer.java | 4 +- .../querydsl/container/SearchHitFieldRef.java | 3 +- .../sql/querydsl/container/TopHitsAggRef.java | 38 ++++ .../xpack/sql/type/DataType.java | 8 +- .../xpack/sql/type/DateEsField.java | 29 +-- .../elasticsearch/xpack/sql/type/Types.java | 7 +- .../xpack/sql/util/DateUtils.java | 1 + .../analyzer/VerifierErrorMessagesTests.java | 48 ++++- .../extractor/TopHitsAggExtractorTests.java | 89 ++++++++ .../xpack/sql/optimizer/OptimizerTests.java | 53 +++++ .../sql/planner/QueryTranslatorTests.java | 57 +++++ .../xpack/sql/type/TypesTests.java | 10 - 34 files changed, 1201 insertions(+), 99 deletions(-) create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractor.java create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/First.java create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Last.java create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/TopHits.java create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/TopHitsAgg.java create mode 100644 x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/TopHitsAggRef.java create mode 100644 x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractorTests.java diff --git a/docs/reference/sql/functions/aggs.asciidoc b/docs/reference/sql/functions/aggs.asciidoc index b23b4ebbc9af4..152d47715250d 100644 --- a/docs/reference/sql/functions/aggs.asciidoc +++ b/docs/reference/sql/functions/aggs.asciidoc @@ -113,6 +113,196 @@ Returns the total number of _distinct non-null_ values in input values. include-tagged::{sql-specs}/docs.csv-spec[aggCountDistinct] -------------------------------------------------- +[[sql-functions-aggs-first]] +===== `FIRST/FIRST_VALUE` + +.Synopsis: +[source, sql] +---------------------------------------------- +FIRST(field_name<1>[, ordering_field_name]<2>) +---------------------------------------------- + +*Input*: + +<1> target field for the aggregation +<2> optional field used for ordering + +*Output*: same type as the input + +.Description: + +Returns the first **non-NULL** value (if such exists) of the `field_name` input column sorted by +the `ordering_field_name` column. If `ordering_field_name` is not provided, only the `field_name` +column is used for the sorting. E.g.: + +[cols="<,<"] +|=== +s| a | b + + | 100 | 1 + | 200 | 1 + | 1 | 2 + | 2 | 2 + | 10 | null + | 20 | null + | null | null +|=== + +[source, sql] +---------------------- +SELECT FIRST(a) FROM t +---------------------- + +will result in: +[cols="<"] +|=== +s| FIRST(a) + | 1 +|=== + +and + +[source, sql] +------------------------- +SELECT FIRST(a, b) FROM t +------------------------- + +will result in: +[cols="<"] +|=== +s| FIRST(a, b) + | 100 +|=== + + +["source","sql",subs="attributes,macros"] +----------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[firstWithOneArg] +----------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +-------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[firstWithOneArgAndGroupBy] +-------------------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +----------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[firstWithTwoArgs] +----------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +--------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[firstWithTwoArgsAndGroupBy] +--------------------------------------------------------------------- + +`FIRST_VALUE` is a name alias and can be used instead of `FIRST`, e.g.: + +["source","sql",subs="attributes,macros"] +-------------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[firstValueWithTwoArgsAndGroupBy] +-------------------------------------------------------------------------- + +[NOTE] +`FIRST` cannot be used in a HAVING clause. +[NOTE] +`FIRST` cannot be used with columns of type <> unless +the field is also <>. + +[[sql-functions-aggs-last]] +===== `LAST/LAST_VALUE` + +.Synopsis: +[source, sql] +-------------------------------------------------- +LAST(field_name<1>[, ordering_field_name]<2>) +-------------------------------------------------- + +*Input*: + +<1> target field for the aggregation +<2> optional field used for ordering + +*Output*: same type as the input + +.Description: + +It's the inverse of <>. Returns the last **non-NULL** value (if such exists) of the +`field_name`input column sorted descending by the `ordering_field_name` column. If `ordering_field_name` is not +provided, only the `field_name` column is used for the sorting. E.g.: + +[cols="<,<"] +|=== +s| a | b + + | 10 | 1 + | 20 | 1 + | 1 | 2 + | 2 | 2 + | 100 | null + | 200 | null + | null | null +|=== + +[source, sql] +------------------------ +SELECT LAST(a) FROM t +------------------------ + +will result in: +[cols="<"] +|=== +s| LAST(a) + | 200 +|=== + +and + +[source, sql] +------------------------ +SELECT LAST(a, b) FROM t +------------------------ + +will result in: +[cols="<"] +|=== +s| LAST(a, b) + | 2 +|=== + + +["source","sql",subs="attributes,macros"] +----------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[lastWithOneArg] +----------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[lastWithOneArgAndGroupBy] +------------------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +----------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[lastWithTwoArgs] +----------------------------------------------------------- + +["source","sql",subs="attributes,macros"] +-------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[lastWithTwoArgsAndGroupBy] +-------------------------------------------------------------------- + +`LAST_VALUE` is a name alias and can be used instead of `LAST`, e.g.: + +["source","sql",subs="attributes,macros"] +------------------------------------------------------------------------- +include-tagged::{sql-specs}/docs.csv-spec[lastValueWithTwoArgsAndGroupBy] +------------------------------------------------------------------------- + +[NOTE] +`LAST` cannot be used in `HAVING` clause. +[NOTE] +`LAST` cannot be used with columns of type <> unless +the field is also <>. + [[sql-functions-aggs-max]] ===== `MAX` @@ -137,6 +327,10 @@ Returns the maximum value across input values in the field `field_name`. include-tagged::{sql-specs}/docs.csv-spec[aggMax] -------------------------------------------------- +[NOTE] +`MAX` on a field of type <> or <> is translated into +<> and therefore, it cannot be used in `HAVING` clause. + [[sql-functions-aggs-min]] ===== `MIN` @@ -161,6 +355,10 @@ Returns the minimum value across input values in the field `field_name`. include-tagged::{sql-specs}/docs.csv-spec[aggMin] -------------------------------------------------- +[NOTE] +`MIN` on a field of type <> or <> is translated into +<> and therefore, it cannot be used in `HAVING` clause. + [[sql-functions-aggs-sum]] ===== `SUM` diff --git a/docs/reference/sql/limitations.asciidoc b/docs/reference/sql/limitations.asciidoc index 33a0859a7fda1..39b7c191131ff 100644 --- a/docs/reference/sql/limitations.asciidoc +++ b/docs/reference/sql/limitations.asciidoc @@ -90,3 +90,10 @@ include-tagged::{sql-specs}/docs.csv-spec[limitationSubSelectRewritten] But, if the sub-select would include a `GROUP BY` or `HAVING` or the enclosing `SELECT` would be more complex than `SELECT X FROM (SELECT ...) WHERE [simple_condition]`, this is currently **un-supported**. + +[float] +=== Use <>/<> aggregation functions in `HAVING` clause + +Using `FIRST` and `LAST` in the `HAVING` clause is not supported. The same applies to +<> and <> when their target column +is of type <> as they are internally translated to `FIRST` and `LAST`. diff --git a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ShowTestCase.java b/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ShowTestCase.java index cddbf4c10078b..382442b5bffec 100644 --- a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ShowTestCase.java +++ b/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/cli/ShowTestCase.java @@ -31,6 +31,10 @@ public void testShowFunctions() throws IOException { assertThat(readLine(), containsString(HEADER_SEPARATOR)); assertThat(readLine(), RegexMatcher.matches("\\s*AVG\\s*\\|\\s*AGGREGATE\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*COUNT\\s*\\|\\s*AGGREGATE\\s*")); + assertThat(readLine(), RegexMatcher.matches("\\s*FIRST\\s*\\|\\s*AGGREGATE\\s*")); + assertThat(readLine(), RegexMatcher.matches("\\s*FIRST_VALUE\\s*\\|\\s*AGGREGATE\\s*")); + assertThat(readLine(), RegexMatcher.matches("\\s*LAST\\s*\\|\\s*AGGREGATE\\s*")); + assertThat(readLine(), RegexMatcher.matches("\\s*LAST_VALUE\\s*\\|\\s*AGGREGATE\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*MAX\\s*\\|\\s*AGGREGATE\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*MIN\\s*\\|\\s*AGGREGATE\\s*")); String line = readLine(); @@ -58,6 +62,8 @@ public void testShowFunctions() throws IOException { public void testShowFunctionsLikePrefix() throws IOException { assertThat(command("SHOW FUNCTIONS LIKE 'L%'"), RegexMatcher.matches("\\s*name\\s*\\|\\s*type\\s*")); assertThat(readLine(), containsString(HEADER_SEPARATOR)); + assertThat(readLine(), RegexMatcher.matches("\\s*LAST\\s*\\|\\s*AGGREGATE\\s*")); + assertThat(readLine(), RegexMatcher.matches("\\s*LAST_VALUE\\s*\\|\\s*AGGREGATE\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*LEAST\\s*\\|\\s*CONDITIONAL\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*LOG\\s*\\|\\s*SCALAR\\s*")); assertThat(readLine(), RegexMatcher.matches("\\s*LOG10\\s*\\|\\s*SCALAR\\s*")); diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec index 1cf3acdcfa4a6..7cc8e6a6ef80a 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec @@ -373,3 +373,76 @@ SELECT COUNT(ALL last_name)=COUNT(ALL first_name) AS areEqual, COUNT(ALL first_n ---------------+---------------+--------------- false |90 |100 ; + +topHitsWithOneArgAndGroupBy +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name) as first, LAST(first_name) as last FROM test_emp GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +null | Berni | Patricio +F | Alejandro | Xinglin +M | Amabile | Zvonko +; + +topHitsWithTwoArgsAndGroupBy +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name, birth_date) as first, LAST(first_name, birth_date) as last FROM test_emp GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +null | Lillian | Eberhardt +F | Sumant | Valdiodio +M | Remzi | Hilari +; + +topHitsWithTwoArgsAndGroupByWithNullsOnTargetField +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name, birth_date) AS first, LAST(first_name, birth_date) AS last FROM test_emp WHERE emp_no BETWEEN 10025 AND 10035 GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +F | null | Divier +M | null | Domenick +; + +topHitsWithTwoArgsAndGroupByWithNullsOnSortingField +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name, birth_date) AS first, LAST(first_name, birth_date) AS last FROM test_emp WHERE emp_no BETWEEN 10047 AND 10052 GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +F | Basil | Basil +M | Hidefumi | Heping +; + +topHitsWithTwoArgsAndGroupByWithNullsOnTargetAndSortingField +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name, birth_date) AS first, LAST(first_name, birth_date) AS last FROM test_emp WHERE emp_no BETWEEN 10037 AND 10052 GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+-------------+----------------- +F | Basil | Weiyi +M | Hidefumi | null +; + +topHitsWithTwoArgsAndGroupByWithAllNullsOnTargetField +schema::gender:s|first:s|last:s +SELECT gender, FIRST(first_name, birth_date) AS first, LAST(first_name, birth_date) AS last FROM test_emp WHERE emp_no BETWEEN 10030 AND 10037 GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +F | null | null +M | null | null +; + +topHitsOnDatetime +schema::gender:s|first:i|last:i +SELECT gender, month(first(birth_date, languages)) first, month(last(birth_date, languages)) last FROM test_emp GROUP BY gender ORDER BY gender; + + gender | first | last +---------------+---------------+--------------- +null | 1 | 10 +F | 4 | 6 +M | 1 | 4 +; diff --git a/x-pack/plugin/sql/qa/src/main/resources/command.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/command.csv-spec index e23ee39c54610..c7ebf9420c9a0 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/command.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/command.csv-spec @@ -8,8 +8,12 @@ SHOW FUNCTIONS; name:s | type:s AVG |AGGREGATE -COUNT |AGGREGATE -MAX |AGGREGATE +COUNT |AGGREGATE +FIRST |AGGREGATE +FIRST_VALUE |AGGREGATE +LAST |AGGREGATE +LAST_VALUE |AGGREGATE +MAX |AGGREGATE MIN |AGGREGATE SUM |AGGREGATE KURTOSIS |AGGREGATE diff --git a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec index 5c4f016d16459..46196f79b29d0 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/docs.csv-spec @@ -185,7 +185,11 @@ SHOW FUNCTIONS; name | type -----------------+--------------- AVG |AGGREGATE -COUNT |AGGREGATE +COUNT |AGGREGATE +FIRST |AGGREGATE +FIRST_VALUE |AGGREGATE +LAST |AGGREGATE +LAST_VALUE |AGGREGATE MAX |AGGREGATE MIN |AGGREGATE SUM |AGGREGATE @@ -699,6 +703,8 @@ SELECT MIN(salary) AS min, MAX(salary) AS max FROM emp HAVING min > 25000; // end::groupByHavingImplicitNoMatch //; + + /////////////////////////////// // // Grouping @@ -998,6 +1004,141 @@ SELECT COUNT(DISTINCT hire_date) unique_hires, COUNT(hire_date) AS hires FROM em // end::aggCountDistinct ; +firstWithOneArg +schema::FIRST(first_name):s +// tag::firstWithOneArg +SELECT FIRST(first_name) FROM emp; + + FIRST(first_name) +-------------------- +Alejandro + +// end::firstWithOneArg +; + +firstWithOneArgAndGroupBy +schema::gender:s|FIRST(first_name):s +// tag::firstWithOneArgAndGroupBy +SELECT gender, FIRST(first_name) FROM emp GROUP BY gender ORDER BY gender; + + gender | FIRST(first_name) +------------+-------------------- +null | Berni +F | Alejandro +M | Amabile + +// end::firstWithOneArgAndGroupBy +; + +firstWithTwoArgs +schema::FIRST(first_name, birth_date):s +// tag::firstWithTwoArgs +SELECT FIRST(first_name, birth_date) FROM emp; + + FIRST(first_name, birth_date) +-------------------------------- +Remzi + +// end::firstWithTwoArgs +; + +firstWithTwoArgsAndGroupBy +schema::gender:s|FIRST(first_name, birth_date):s +// tag::firstWithTwoArgsAndGroupBy +SELECT gender, FIRST(first_name, birth_date) FROM emp GROUP BY gender ORDER BY gender; + + gender | FIRST(first_name, birth_date) +--------------+-------------------------------- +null | Lillian +F | Sumant +M | Remzi + +// end::firstWithTwoArgsAndGroupBy +; + +firstValueWithTwoArgsAndGroupBy +schema::gender:s|FIRST_VALUE(first_name, birth_date):s +// tag::firstValueWithTwoArgsAndGroupBy +SELECT gender, FIRST_VALUE(first_name, birth_date) FROM emp GROUP BY gender ORDER BY gender; + + gender | FIRST_VALUE(first_name, birth_date) +--------------+-------------------------------------- +null | Lillian +F | Sumant +M | Remzi + +// end::firstValueWithTwoArgsAndGroupBy +; + +lastWithOneArg +schema::LAST(first_name):s +// tag::lastWithOneArg +SELECT LAST(first_name) FROM emp; + + LAST(first_name) +------------------- +Zvonko + +// end::lastWithOneArg +; + + +lastWithOneArgAndGroupBy +schema::gender:s|LAST(first_name):s +// tag::lastWithOneArgAndGroupBy +SELECT gender, LAST(first_name) FROM emp GROUP BY gender ORDER BY gender; + + gender | LAST(first_name) +------------+------------------- +null | Patricio +F | Xinglin +M | Zvonko + +// end::lastWithOneArgAndGroupBy +; + + +lastWithTwoArgs +schema::LAST(first_name, birth_date):s +// tag::lastWithTwoArgs +SELECT LAST(first_name, birth_date) FROM emp; + + LAST(first_name, birth_date) +------------------------------- +Hilari + +// end::lastWithTwoArgs +; + +lastWithTwoArgsAndGroupBy +schema::gender:s|LAST(first_name, birth_date):s +// tag::lastWithTwoArgsAndGroupBy +SELECT gender, LAST(first_name, birth_date) FROM emp GROUP BY gender ORDER BY gender; + + gender | LAST(first_name, birth_date) +-----------+------------------------------- +null | Eberhardt +F | Valdiodio +M | Hilari + +// end::lastWithTwoArgsAndGroupBy +; + +lastValueWithTwoArgsAndGroupBy +schema::gender:s|LAST_VALUE(first_name, birth_date):s +// tag::lastValueWithTwoArgsAndGroupBy +SELECT gender, LAST_VALUE(first_name, birth_date) FROM emp GROUP BY gender ORDER BY gender; + + gender | LAST_VALUE(first_name, birth_date) +-----------+------------------------------------- +null | Eberhardt +F | Valdiodio +M | Hilari + +// end::lastValueWithTwoArgsAndGroupBy +; + + aggMax // tag::aggMax SELECT MAX(salary) AS max FROM emp; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java index 3f363d5a92809..fc7e97f1b77b7 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/analysis/analyzer/Verifier.java @@ -18,7 +18,11 @@ import org.elasticsearch.xpack.sql.expression.function.FunctionAttribute; import org.elasticsearch.xpack.sql.expression.function.Functions; import org.elasticsearch.xpack.sql.expression.function.Score; +import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; +import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction; @@ -66,7 +70,7 @@ */ public final class Verifier { private final Metrics metrics; - + public Verifier(Metrics metrics) { this.metrics = metrics; } @@ -366,16 +370,26 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set localFailu if (f.child() instanceof Aggregate) { Aggregate a = (Aggregate) f.child(); - Map> missing = new LinkedHashMap<>(); + Set missing = new LinkedHashSet<>(); + Set unsupported = new LinkedHashSet<>(); Expression condition = f.condition(); // variation of checkGroupMatch customized for HAVING, which requires just aggregations - condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, condition, missing, functions)); + condition.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, missing, unsupported, functions)); if (!missing.isEmpty()) { String plural = missing.size() > 1 ? "s" : StringUtils.EMPTY; localFailures.add( fail(condition, "Cannot use HAVING filter on non-aggregate" + plural + " %s; use WHERE instead", - Expressions.names(missing.keySet()))); + Expressions.names(missing))); + groupingFailures.add(a); + return false; + } + + if (!unsupported.isEmpty()) { + String plural = unsupported.size() > 1 ? "s" : StringUtils.EMPTY; + localFailures.add( + fail(condition, "HAVING filter is unsupported for function" + plural + " %s", + Expressions.names(unsupported))); groupingFailures.add(a); return false; } @@ -385,8 +399,8 @@ private static boolean checkGroupByHaving(LogicalPlan p, Set localFailu } - private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node source, - Map> missing, Map functions) { + private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Set missing, + Set unsupported, Map functions) { // resolve FunctionAttribute to backing functions if (e instanceof FunctionAttribute) { @@ -407,14 +421,24 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node sourc // unwrap function to find the base for (Expression arg : sf.arguments()) { - arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, source, missing, functions)); + arg.collectFirstChildren(c -> checkGroupByHavingHasOnlyAggs(c, missing, unsupported, functions)); } return true; } else if (e instanceof Score) { - // Score can't be used for having - missing.put(e, source); + // Score can't be used in having + unsupported.add(e); + return true; + } else if (e instanceof TopHits) { + // First and Last cannot be used in having + unsupported.add(e); return true; + } else if (e instanceof Min || e instanceof Max) { + if (((AggregateFunction) e).field().dataType().isString()) { + // Min & Max on a Keyword field will be translated to First & Last respectively + unsupported.add(e); + return true; + } } // skip literals / foldable @@ -428,7 +452,7 @@ private static boolean checkGroupByHavingHasOnlyAggs(Expression e, Node sourc // left without leaves which have to match; that's a failure since everything should be based on an agg if (e instanceof Attribute) { - missing.put(e, source); + missing.add(e); return true; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java index ff02ed85818fe..6604b5c26d893 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java @@ -32,6 +32,7 @@ import org.elasticsearch.xpack.sql.execution.search.extractor.FieldHitExtractor; import org.elasticsearch.xpack.sql.execution.search.extractor.HitExtractor; import org.elasticsearch.xpack.sql.execution.search.extractor.MetricAggExtractor; +import org.elasticsearch.xpack.sql.execution.search.extractor.TopHitsAggExtractor; import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggExtractorInput; import org.elasticsearch.xpack.sql.expression.gen.pipeline.AggPathInput; import org.elasticsearch.xpack.sql.expression.gen.pipeline.HitExtractorInput; @@ -45,6 +46,7 @@ import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer; import org.elasticsearch.xpack.sql.querydsl.container.ScriptFieldRef; import org.elasticsearch.xpack.sql.querydsl.container.SearchHitFieldRef; +import org.elasticsearch.xpack.sql.querydsl.container.TopHitsAggRef; import org.elasticsearch.xpack.sql.session.Configuration; import org.elasticsearch.xpack.sql.session.Rows; import org.elasticsearch.xpack.sql.session.SchemaRowSet; @@ -276,6 +278,11 @@ private BucketExtractor createExtractor(FieldExtraction ref, BucketExtractor tot return new MetricAggExtractor(r.name(), r.property(), r.innerKey()); } + if (ref instanceof TopHitsAggRef) { + TopHitsAggRef r = (TopHitsAggRef) ref; + return new TopHitsAggExtractor(r.name(), r.fieldDataType()); + } + if (ref == GlobalCountRef.INSTANCE) { return totalCount; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/BucketExtractors.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/BucketExtractors.java index bae0590935fd7..221662b79c117 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/BucketExtractors.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/BucketExtractors.java @@ -24,6 +24,7 @@ public static List getNamedWriteables() { entries.add(new Entry(BucketExtractor.class, CompositeKeyExtractor.NAME, CompositeKeyExtractor::new)); entries.add(new Entry(BucketExtractor.class, ComputingExtractor.NAME, ComputingExtractor::new)); entries.add(new Entry(BucketExtractor.class, MetricAggExtractor.NAME, MetricAggExtractor::new)); + entries.add(new Entry(BucketExtractor.class, TopHitsAggExtractor.NAME, TopHitsAggExtractor::new)); entries.add(new Entry(BucketExtractor.class, ConstantExtractor.NAME, ConstantExtractor::new)); return entries; } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractor.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractor.java new file mode 100644 index 0000000000000..429ff2edfc984 --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractor.java @@ -0,0 +1,94 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.execution.search.extractor; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket; +import org.elasticsearch.search.aggregations.metrics.InternalTopHits; +import org.elasticsearch.xpack.sql.SqlIllegalArgumentException; +import org.elasticsearch.xpack.sql.type.DataType; +import org.elasticsearch.xpack.sql.util.DateUtils; + +import java.io.IOException; +import java.util.Objects; + +public class TopHitsAggExtractor implements BucketExtractor { + + static final String NAME = "th"; + + private final String name; + private final DataType fieldDataType; + + public TopHitsAggExtractor(String name, DataType fieldDataType) { + this.name = name; + this.fieldDataType = fieldDataType; + } + + TopHitsAggExtractor(StreamInput in) throws IOException { + name = in.readString(); + fieldDataType = in.readEnum(DataType.class); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeEnum(fieldDataType); + } + + String name() { + return name; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public Object extract(Bucket bucket) { + InternalTopHits agg = bucket.getAggregations().get(name); + if (agg == null) { + throw new SqlIllegalArgumentException("Cannot find an aggregation named {}", name); + } + + if (agg.getHits().getTotalHits() == null || agg.getHits().getTotalHits().value == 0) { + return null; + } + + Object value = agg.getHits().getAt(0).getFields().values().iterator().next().getValue(); + if (fieldDataType.isDateBased()) { + return DateUtils.asDateTime(Long.parseLong(value.toString())); + } else { + return value; + } + } + + @Override + public int hashCode() { + return Objects.hash(name, fieldDataType); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + TopHitsAggExtractor other = (TopHitsAggExtractor) obj; + return Objects.equals(name, other.name) + && Objects.equals(fieldDataType, other.fieldDataType); + } + + @Override + public String toString() { + return "TopHits>" + name + "[" + fieldDataType + "]"; + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java index f0b8ab2f406b9..c7297edb126ad 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/Foldables.java @@ -32,18 +32,10 @@ public static Object valueOf(Expression e) { throw new SqlIllegalArgumentException("Cannot determine value for {}", e); } - public static String stringValueOf(Expression e) { - return valueOf(e, DataType.KEYWORD); - } - public static Integer intValueOf(Expression e) { return valueOf(e, DataType.INTEGER); } - public static Long longValueOf(Expression e) { - return valueOf(e, DataType.LONG); - } - public static double doubleValueOf(Expression e) { return valueOf(e, DataType.DOUBLE); } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java index 060808e4cccd0..876af256294c8 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/FunctionRegistry.java @@ -10,7 +10,9 @@ import org.elasticsearch.xpack.sql.expression.Expression; import org.elasticsearch.xpack.sql.expression.function.aggregate.Avg; import org.elasticsearch.xpack.sql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.sql.expression.function.aggregate.First; import org.elasticsearch.xpack.sql.expression.function.aggregate.Kurtosis; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Last; import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile; @@ -19,6 +21,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.StddevPop; import org.elasticsearch.xpack.sql.expression.function.aggregate.Sum; import org.elasticsearch.xpack.sql.expression.function.aggregate.SumOfSquares; +import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.aggregate.VarPop; import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram; import org.elasticsearch.xpack.sql.expression.function.scalar.Cast; @@ -143,6 +146,8 @@ private void defineDefaultFunctions() { // Aggregate functions addToMap(def(Avg.class, Avg::new, "AVG"), def(Count.class, Count::new, "COUNT"), + def(First.class, First::new, "FIRST", "FIRST_VALUE"), + def(Last.class, Last::new, "LAST", "LAST_VALUE"), def(Max.class, Max::new, "MAX"), def(Min.class, Min::new, "MIN"), def(Sum.class, Sum::new, "SUM")); @@ -480,7 +485,8 @@ interface DatetimeBinaryFunctionBuilder { static FunctionDefinition def(Class function, BinaryFunctionBuilder ctorRef, String... names) { FunctionBuilder builder = (source, children, distinct, cfg) -> { - boolean isBinaryOptionalParamFunction = function.isAssignableFrom(Round.class) || function.isAssignableFrom(Truncate.class); + boolean isBinaryOptionalParamFunction = function.isAssignableFrom(Round.class) || function.isAssignableFrom(Truncate.class) + || TopHits.class.isAssignableFrom(function); if (isBinaryOptionalParamFunction && (children.size() > 2 || children.size() < 1)) { throw new IllegalArgumentException("expects one or two arguments"); } else if (!isBinaryOptionalParamFunction && children.size() != 2) { @@ -583,4 +589,4 @@ private static FunctionDefinition def(Class function, private interface CastFunctionBuilder { T build(Source source, Expression expression, DataType dataType); } -} \ No newline at end of file +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/First.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/First.java new file mode 100644 index 0000000000000..527d299fa3c58 --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/First.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.expression.function.aggregate; + +import org.elasticsearch.xpack.sql.expression.Expression; +import org.elasticsearch.xpack.sql.tree.NodeInfo; +import org.elasticsearch.xpack.sql.tree.Source; + +import java.util.List; + +/** + * Find the first value of the field ordered by the 2nd argument (if provided) + */ +public class First extends TopHits { + + public First(Source source, Expression field, Expression sortField) { + super(source, field, sortField); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, First::new, field(), orderField()); + } + + @Override + public First replaceChildren(List newChildren) { + if (newChildren.size() > 2) { + throw new IllegalArgumentException("expected one or two children but received [" + newChildren.size() + "]"); + } + return new First(source(), newChildren.get(0), newChildren.size() > 1 ? newChildren.get(1) : null); + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Last.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Last.java new file mode 100644 index 0000000000000..93b893e510ceb --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Last.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.expression.function.aggregate; + +import org.elasticsearch.xpack.sql.expression.Expression; +import org.elasticsearch.xpack.sql.tree.NodeInfo; +import org.elasticsearch.xpack.sql.tree.Source; + +import java.util.List; + +/** + * Find the last value of the field ordered by the 2nd argument (if provided) + */ +public class Last extends TopHits { + + public Last(Source source, Expression field, Expression sortField) { + super(source, field, sortField); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Last::new, field(), orderField()); + } + + @Override + public Last replaceChildren(List newChildren) { + if (newChildren.size() > 2) { + throw new IllegalArgumentException("expected one or two children but received [" + newChildren.size() + "]"); + } + return new Last(source(), newChildren.get(0), newChildren.size() > 1 ? newChildren.get(1) : null); + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java index 8aa72dea7d1da..898c98463445e 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Max.java @@ -45,6 +45,10 @@ public String innerName() { @Override protected TypeResolution resolveType() { - return Expressions.typeMustBeNumericOrDate(field(), sourceText(), ParamOrdinal.DEFAULT); + if (field().dataType().isString()) { + return TypeResolution.TYPE_RESOLVED; + } else { + return Expressions.typeMustBeNumericOrDate(field(), sourceText(), ParamOrdinal.DEFAULT); + } } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java index bc3c5952f3896..8652759fca486 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/Min.java @@ -48,6 +48,10 @@ public String innerName() { @Override protected TypeResolution resolveType() { - return Expressions.typeMustBeNumericOrDate(field(), sourceText(), ParamOrdinal.DEFAULT); + if (field().dataType().isString()) { + return TypeResolution.TYPE_RESOLVED; + } else { + return Expressions.typeMustBeNumericOrDate(field(), sourceText(), ParamOrdinal.DEFAULT); + } } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/TopHits.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/TopHits.java new file mode 100644 index 0000000000000..9cc8cccaa9778 --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/expression/function/aggregate/TopHits.java @@ -0,0 +1,67 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.expression.function.aggregate; + +import org.elasticsearch.xpack.sql.analysis.index.MappingException; +import org.elasticsearch.xpack.sql.expression.Expression; +import org.elasticsearch.xpack.sql.expression.Expressions; +import org.elasticsearch.xpack.sql.expression.FieldAttribute; +import org.elasticsearch.xpack.sql.tree.Source; +import org.elasticsearch.xpack.sql.type.DataType; + +import java.util.Collections; + +import static org.elasticsearch.common.logging.LoggerMessageFormat.format; + +/** + * Super class of Aggregation functions on field types other than numeric, that need to be + * translated into an ES {@link org.elasticsearch.search.aggregations.metrics.TopHits} aggregation. + */ +public abstract class TopHits extends AggregateFunction { + + TopHits(Source source, Expression field, Expression sortField) { + super(source, field, sortField != null ? Collections.singletonList(sortField) : Collections.emptyList()); + } + + public Expression orderField() { + return parameters().isEmpty() ? null : parameters().get(0); + } + + @Override + public DataType dataType() { + return field().dataType(); + } + + @Override + protected TypeResolution resolveType() { + if (field().foldable()) { + return new TypeResolution(format(null, "First argument of [{}] must be a table column, found constant [{}]", + functionName(), + Expressions.name(field()))); + } + try { + ((FieldAttribute) field()).exactAttribute(); + } catch (MappingException ex) { + return new TypeResolution(format(null, "[{}] cannot operate on first argument field of data type [{}]", + functionName(), field().dataType().esType)); + } + + if (orderField() != null) { + if (orderField().foldable()) { + return new TypeResolution(format(null, "Second argument of [{}] must be a table column, found constant [{}]", + functionName(), + Expressions.name(orderField()))); + } + try { + ((FieldAttribute) orderField()).exactAttribute(); + } catch (MappingException ex) { + return new TypeResolution(format(null, "[{}] cannot operate on second argument field of data type [{}]", + functionName(), orderField().dataType().esType)); + } + } + return TypeResolution.TYPE_RESOLVED; + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java index 38313fa613a0e..744ba041d357a 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/optimizer/Optimizer.java @@ -27,14 +27,19 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute; import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats; import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStatsEnclosed; +import org.elasticsearch.xpack.sql.expression.function.aggregate.First; import org.elasticsearch.xpack.sql.expression.function.aggregate.InnerAggregate; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Last; import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats; import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStatsEnclosed; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentile; import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRank; import org.elasticsearch.xpack.sql.expression.function.aggregate.PercentileRanks; import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentiles; import org.elasticsearch.xpack.sql.expression.function.aggregate.Stats; +import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.scalar.Cast; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunctionAttribute; @@ -120,7 +125,8 @@ protected Iterable.Batch> batches() { new ReplaceAggsWithStats(), new PromoteStatsToExtendedStats(), new ReplaceAggsWithPercentiles(), - new ReplaceAggsWithPercentileRanks() + new ReplaceAggsWithPercentileRanks(), + new ReplaceMinMaxWithTopHits() ); Batch operators = new Batch("Operator Optimization", @@ -622,6 +628,41 @@ protected LogicalPlan rule(LogicalPlan e) { } } + static class ReplaceMinMaxWithTopHits extends OptimizerRule { + + @Override + protected LogicalPlan rule(LogicalPlan plan) { + Map seen = new HashMap<>(); + return plan.transformExpressionsDown(e -> { + if (e instanceof Min) { + Min min = (Min) e; + if (min.field().dataType().isString()) { + TopHits topHits = seen.get(min.id()); + if (topHits != null) { + return topHits; + } + topHits = new First(min.source(), min.field(), null); + seen.put(min.id(), topHits); + return topHits; + } + } + if (e instanceof Max) { + Max max = (Max) e; + if (max.field().dataType().isString()) { + TopHits topHits = seen.get(max.id()); + if (topHits != null) { + return topHits; + } + topHits = new Last(max.source(), max.field(), null); + seen.put(max.id(), topHits); + return topHits; + } + } + return e; + }); + } + } + static class PruneFilters extends OptimizerRule { @Override diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java index da409439558c7..b7b294f05654f 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.CompoundNumericAggregate; import org.elasticsearch.xpack.sql.expression.function.aggregate.Count; import org.elasticsearch.xpack.sql.expression.function.aggregate.InnerAggregate; +import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunctionAttribute; @@ -57,6 +58,7 @@ import org.elasticsearch.xpack.sql.querydsl.container.ScriptSort; import org.elasticsearch.xpack.sql.querydsl.container.Sort.Direction; import org.elasticsearch.xpack.sql.querydsl.container.Sort.Missing; +import org.elasticsearch.xpack.sql.querydsl.container.TopHitsAggRef; import org.elasticsearch.xpack.sql.querydsl.query.Query; import org.elasticsearch.xpack.sql.rule.Rule; import org.elasticsearch.xpack.sql.rule.RuleExecutor; @@ -422,7 +424,11 @@ private Tuple addAggFunction(GroupByKey groupingAg } else { LeafAgg leafAgg = toAgg(functionId, f); - aggInput = new AggPathInput(f, new MetricAggRef(leafAgg.id())); + if (f instanceof TopHits) { + aggInput = new AggPathInput(f, new TopHitsAggRef(leafAgg.id(), f.dataType())); + } else { + aggInput = new AggPathInput(f, new MetricAggRef(leafAgg.id())); + } queryC = queryC.with(queryC.aggs().addAgg(leafAgg)); } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java index 1a5ceb686e609..de529b2e4ca61 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.sql.planner; +import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.sql.SqlIllegalArgumentException; import org.elasticsearch.xpack.sql.expression.Attribute; import org.elasticsearch.xpack.sql.expression.Expression; @@ -21,6 +22,8 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.CompoundNumericAggregate; import org.elasticsearch.xpack.sql.expression.function.aggregate.Count; import org.elasticsearch.xpack.sql.expression.function.aggregate.ExtendedStats; +import org.elasticsearch.xpack.sql.expression.function.aggregate.First; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Last; import org.elasticsearch.xpack.sql.expression.function.aggregate.MatrixStats; import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; @@ -28,6 +31,7 @@ import org.elasticsearch.xpack.sql.expression.function.aggregate.Percentiles; import org.elasticsearch.xpack.sql.expression.function.aggregate.Stats; import org.elasticsearch.xpack.sql.expression.function.aggregate.Sum; +import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits; import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.sql.expression.function.grouping.Histogram; import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction; @@ -76,6 +80,7 @@ import org.elasticsearch.xpack.sql.querydsl.agg.PercentilesAgg; import org.elasticsearch.xpack.sql.querydsl.agg.StatsAgg; import org.elasticsearch.xpack.sql.querydsl.agg.SumAgg; +import org.elasticsearch.xpack.sql.querydsl.agg.TopHitsAgg; import org.elasticsearch.xpack.sql.querydsl.query.BoolQuery; import org.elasticsearch.xpack.sql.querydsl.query.ExistsQuery; import org.elasticsearch.xpack.sql.querydsl.query.MatchQuery; @@ -138,7 +143,9 @@ private QueryTranslator(){} new PercentilesAggs(), new PercentileRanksAggs(), new CountAggs(), - new DateTimes() + new DateTimes(), + new Firsts(), + new Lasts() ); static class QueryTranslation { @@ -453,6 +460,17 @@ static String field(AggregateFunction af) { af.nodeString()); } + private static String topAggsField(AggregateFunction af, Expression e) { + if (e == null) { + return null; + } + if (e instanceof FieldAttribute) { + return ((FieldAttribute) e).exactAttribute().name(); + } + throw new SqlIllegalArgumentException("Does not know how to convert argument {} for function {}", e.nodeString(), + af.nodeString()); + } + // TODO: need to optimize on ngram // TODO: see whether escaping is needed static class Likes extends ExpressionTranslator { @@ -832,6 +850,24 @@ protected LeafAgg toAgg(String id, Min m) { } } + static class Firsts extends TopHitsAggTranslator { + + @Override + protected LeafAgg toAgg(String id, First f) { + return new TopHitsAgg(id, topAggsField(f, f.field()), f.dataType(), + topAggsField(f, f.orderField()), f.orderField() == null ? null : f.orderField().dataType(), SortOrder.ASC); + } + } + + static class Lasts extends TopHitsAggTranslator { + + @Override + protected LeafAgg toAgg(String id, Last l) { + return new TopHitsAgg(id, topAggsField(l, l.field()), l.dataType(), + topAggsField(l, l.orderField()), l.orderField() == null ? null : l.orderField().dataType(), SortOrder.DESC); + } + } + static class StatsAggs extends CompoundAggTranslator { @Override @@ -912,6 +948,15 @@ protected final LeafAgg asAgg(String id, C function) { protected abstract LeafAgg toAgg(String id, C f); } + abstract static class TopHitsAggTranslator extends AggTranslator { + + @Override + protected final LeafAgg asAgg(String id, C function) { + return toAgg(id, function); + } + + protected abstract LeafAgg toAgg(String id, C f); + } abstract static class ExpressionTranslator { diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/Aggs.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/Aggs.java index c7ab17670a212..18537fee87558 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/Aggs.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/Aggs.java @@ -54,17 +54,17 @@ protected GroupByKey copy(String id, String fieldName, ScriptTemplate script, Di }; private final List groups; - private final List metricAggs; + private final List simpleAggs; private final List pipelineAggs; public Aggs() { this(emptyList(), emptyList(), emptyList()); } - public Aggs(List groups, List metricAggs, List pipelineAggs) { + public Aggs(List groups, List simpleAggs, List pipelineAggs) { this.groups = groups; - this.metricAggs = metricAggs; + this.simpleAggs = simpleAggs; this.pipelineAggs = pipelineAggs; } @@ -75,7 +75,7 @@ public List groups() { public AggregationBuilder asAggBuilder() { AggregationBuilder rootGroup = null; - if (groups.isEmpty() && metricAggs.isEmpty()) { + if (groups.isEmpty() && simpleAggs.isEmpty()) { return null; } @@ -93,7 +93,7 @@ public AggregationBuilder asAggBuilder() { rootGroup = new FiltersAggregationBuilder(ROOT_GROUP_NAME, matchAllQuery()); } - for (LeafAgg agg : metricAggs) { + for (LeafAgg agg : simpleAggs) { rootGroup.subAggregation(agg.toBuilder()); } @@ -109,18 +109,18 @@ public boolean useImplicitGroupBy() { } public Aggs addGroups(Collection groups) { - return new Aggs(combine(this.groups, groups), metricAggs, pipelineAggs); + return new Aggs(combine(this.groups, groups), simpleAggs, pipelineAggs); } public Aggs addAgg(LeafAgg agg) { - if (metricAggs.contains(agg)) { + if (simpleAggs.contains(agg)) { return this; } - return new Aggs(groups, combine(metricAggs, agg), pipelineAggs); + return new Aggs(groups, combine(simpleAggs, agg), pipelineAggs); } public Aggs addAgg(PipelineAgg pipelineAgg) { - return new Aggs(groups, metricAggs, combine(pipelineAggs, pipelineAgg)); + return new Aggs(groups, simpleAggs, combine(pipelineAggs, pipelineAgg)); } public GroupByKey findGroupForAgg(String groupOrAggId) { @@ -131,7 +131,7 @@ public GroupByKey findGroupForAgg(String groupOrAggId) { } // maybe it's the default group agg ? - for (Agg agg : metricAggs) { + for (Agg agg : simpleAggs) { if (groupOrAggId.equals(agg.id())) { return IMPLICIT_GROUP_KEY; } @@ -153,12 +153,12 @@ public Aggs updateGroup(GroupByKey group) { } public Aggs with(List groups) { - return new Aggs(groups, metricAggs, pipelineAggs); + return new Aggs(groups, simpleAggs, pipelineAggs); } @Override public int hashCode() { - return Objects.hash(groups, metricAggs, pipelineAggs); + return Objects.hash(groups, simpleAggs, pipelineAggs); } @Override @@ -173,8 +173,8 @@ public boolean equals(Object obj) { Aggs other = (Aggs) obj; return Objects.equals(groups, other.groups) - && Objects.equals(metricAggs, other.metricAggs) + && Objects.equals(simpleAggs, other.simpleAggs) && Objects.equals(pipelineAggs, other.pipelineAggs); } -} \ No newline at end of file +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/TopHitsAgg.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/TopHitsAgg.java new file mode 100644 index 0000000000000..741d83aab5123 --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/agg/TopHitsAgg.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.querydsl.agg; + +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.SortBuilder; +import org.elasticsearch.search.sort.SortOrder; +import org.elasticsearch.xpack.sql.type.DataType; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits; +import static org.elasticsearch.xpack.sql.querydsl.container.Sort.Missing.LAST; + +public class TopHitsAgg extends LeafAgg { + + private final String sortField; + private final SortOrder sortOrder; + private final DataType fieldDataType; + private final DataType sortFieldDataType; + + + public TopHitsAgg(String id, String fieldName, DataType fieldDataType, String sortField, + DataType sortFieldDataType, SortOrder sortOrder) { + super(id, fieldName); + this.sortField = sortField; + this.sortOrder = sortOrder; + this.fieldDataType = fieldDataType; + this.sortFieldDataType = sortFieldDataType; + } + + @Override + AggregationBuilder toBuilder() { + // Sort missing values (NULLs) as last to get the first/last non-null value + List> sortBuilderList = new ArrayList<>(2); + if (sortField != null) { + sortBuilderList.add( + new FieldSortBuilder(sortField) + .order(sortOrder) + .missing(LAST.position()) + .unmappedType(sortFieldDataType.esType)); + } + sortBuilderList.add( + new FieldSortBuilder(fieldName()) + .order(sortOrder) + .missing(LAST.position()) + .unmappedType(fieldDataType.esType)); + + return topHits(id()).docValueField(fieldName(), fieldDataType.format()).sorts(sortBuilderList).size(1); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + if (!super.equals(o)) { + return false; + } + TopHitsAgg that = (TopHitsAgg) o; + return Objects.equals(sortField, that.sortField) + && sortOrder == that.sortOrder + && fieldDataType == that.fieldDataType; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), sortField, sortOrder, fieldDataType); + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java index fee8d0e942a3b..647cb70f1deac 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/QueryContainer.java @@ -29,7 +29,6 @@ import org.elasticsearch.xpack.sql.querydsl.query.NestedQuery; import org.elasticsearch.xpack.sql.querydsl.query.Query; import org.elasticsearch.xpack.sql.tree.Source; -import org.elasticsearch.xpack.sql.type.DataType; import java.io.IOException; import java.util.AbstractMap; @@ -182,9 +181,8 @@ private Tuple nestedHitFieldRef(FieldAttribute List nestedRefs = new ArrayList<>(); String name = aliasName(attr); - String format = attr.field().getDataType() == DataType.DATETIME ? "epoch_millis" : null; Query q = rewriteToContainNestedField(query, attr.source(), - attr.nestedParent().name(), name, format, attr.field().isAggregatable()); + attr.nestedParent().name(), name, attr.field().getDataType().format(), attr.field().isAggregatable()); SearchHitFieldRef nestedFieldRef = new SearchHitFieldRef(name, attr.field().getDataType(), attr.field().isAggregatable(), attr.parent().name()); diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/SearchHitFieldRef.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/SearchHitFieldRef.java index 5e51b36fc3c32..e0233e1d1663c 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/SearchHitFieldRef.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/SearchHitFieldRef.java @@ -49,8 +49,7 @@ public void collectFields(SqlSourceBuilder sourceBuilder) { return; } if (docValue) { - String format = dataType == DataType.DATETIME ? "epoch_millis" : null; - sourceBuilder.addDocField(name, format); + sourceBuilder.addDocField(name, dataType.format()); } else { sourceBuilder.addSourceField(name); } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/TopHitsAggRef.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/TopHitsAggRef.java new file mode 100644 index 0000000000000..8e5eabec9b861 --- /dev/null +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/querydsl/container/TopHitsAggRef.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.querydsl.container; + +import org.elasticsearch.xpack.sql.execution.search.AggRef; +import org.elasticsearch.xpack.sql.type.DataType; + +/** + * Reference to a TopHits aggregation. + * Since only one field is returned we only need its data type + */ +public class TopHitsAggRef extends AggRef { + + // only for readability via toString() + private final String name; + private final DataType fieldDataType; + + public TopHitsAggRef(String name, DataType fieldDataType) { + this.name = name; + this.fieldDataType = fieldDataType; + } + + public String name() { + return name; + } + + public DataType fieldDataType() { + return fieldDataType; + } + + @Override + public String toString() { + return ">" + name + "[" + fieldDataType.esType + "]"; + } +} diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java index 3210c9ceb8a8a..d2699692d746e 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DataType.java @@ -5,6 +5,8 @@ */ package org.elasticsearch.xpack.sql.type; +import org.elasticsearch.xpack.sql.util.DateUtils; + import java.sql.JDBCType; import java.sql.SQLType; import java.sql.Types; @@ -72,7 +74,6 @@ public enum DataType { // @formatter:on private static final Map odbcToEs; - static { odbcToEs = new HashMap<>(36); @@ -123,6 +124,7 @@ public enum DataType { odbcToEs.put("SQL_INTERVAL_DAY_TO_SECOND", INTERVAL_DAY_TO_SECOND); } + /** * Elasticsearch type name */ @@ -240,4 +242,8 @@ public static DataType fromTypeName(String esType) { return DataType.UNSUPPORTED; } } + + public String format() { + return isDateBased() ? DateUtils.DATE_PARSE_FORMAT : null; + } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DateEsField.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DateEsField.java index 71924adab5581..fc2d10448c152 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DateEsField.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/DateEsField.java @@ -5,41 +5,14 @@ */ package org.elasticsearch.xpack.sql.type; -import org.elasticsearch.common.util.CollectionUtils; - -import java.util.Arrays; -import java.util.List; import java.util.Map; -import java.util.Objects; /** * SQL-related information about an index field with date type */ public class DateEsField extends EsField { - public static final List DEFAULT_FORMAT = Arrays.asList("strict_date_optional_time", "epoch_millis"); - private final List formats; - - public DateEsField(String name, Map properties, boolean hasDocValues, String... formats) { + public DateEsField(String name, Map properties, boolean hasDocValues) { super(name, DataType.DATETIME, properties, hasDocValues); - this.formats = CollectionUtils.isEmpty(formats) ? DEFAULT_FORMAT : Arrays.asList(formats); - } - - public List getFormats() { - return formats; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - if (!super.equals(o)) return false; - DateEsField dateField = (DateEsField) o; - return Objects.equals(formats, dateField.formats); - } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), formats); } } diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/Types.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/Types.java index 0af0a5f322cc6..317d3ccb9176d 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/Types.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/type/Types.java @@ -87,12 +87,7 @@ private static void walkMapping(String name, Object value, Map field = new KeywordEsField(name, properties, docValues, length, normalized); break; case DATETIME: - Object fmt = content.get("format"); - if (fmt != null) { - field = new DateEsField(name, properties, docValues, Strings.delimitedListToStringArray(fmt.toString(), "||")); - } else { - field = new DateEsField(name, properties, docValues); - } + field = new DateEsField(name, properties, docValues); break; case UNSUPPORTED: String type = content.get("type").toString(); diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/util/DateUtils.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/util/DateUtils.java index bdd455fe10f63..deb7b9e9703c2 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/util/DateUtils.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/util/DateUtils.java @@ -27,6 +27,7 @@ public final class DateUtils { private static final DateTimeFormatter UTC_DATE_FORMATTER = ISODateTimeFormat.dateOptionalTimeParser().withZoneUTC(); public static final ZoneId UTC = ZoneId.of("Z"); + public static final String DATE_PARSE_FORMAT = "epoch_millis"; private DateUtils() {} diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java index 4279910e0e03b..0db1dc1c661c7 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/analysis/analyzer/VerifierErrorMessagesTests.java @@ -49,11 +49,15 @@ private String error(IndexResolution getIndexResult, String sql) { } private LogicalPlan accept(String sql) { - Map mapping = TypesTests.loadMapping("mapping-multi-field-with-nested.json"); - EsIndex test = new EsIndex("test", mapping); + EsIndex test = getTestEsIndex(); return accept(IndexResolution.valid(test), sql); } + private EsIndex getTestEsIndex() { + Map mapping = TypesTests.loadMapping("mapping-multi-field-with-nested.json"); + return new EsIndex("test", mapping); + } + private LogicalPlan accept(IndexResolution resolution, String sql) { Analyzer analyzer = new Analyzer(TestUtils.TEST_CFG, new FunctionRegistry(), resolution, new Verifier(new Metrics())); return analyzer.analyze(parser.createStatement(sql), true); @@ -382,11 +386,6 @@ public void testNotSupportedAggregateOnDate() { error("SELECT AVG(date) FROM test")); } - public void testNotSupportedAggregateOnString() { - assertEquals("1:8: [MAX(keyword)] argument must be [date, datetime or numeric], found value [keyword] type [keyword]", - error("SELECT MAX(keyword) FROM test")); - } - public void testInvalidTypeForStringFunction_WithOneArg() { assertEquals("1:8: [LENGTH] argument must be [string], found value [1] type [integer]", error("SELECT LENGTH(1)")); @@ -546,5 +545,40 @@ public void testErrorMessageForPercentileRankWithSecondArgBasedOnAField() { assertEquals("1:8: Second argument of PERCENTILE_RANK must be a constant, received [ABS(int)]", error("SELECT PERCENTILE_RANK(int, ABS(int)) FROM test")); } + + public void testTopHitsFirstArgConstant() { + assertEquals("1:8: First argument of [FIRST] must be a table column, found constant ['foo']", + error("SELECT FIRST('foo', int) FROM test")); + } + + public void testTopHitsSecondArgConstant() { + assertEquals("1:8: Second argument of [LAST] must be a table column, found constant [10]", + error("SELECT LAST(int, 10) FROM test")); + } + + public void testTopHitsFirstArgTextWithNoKeyword() { + assertEquals("1:8: [FIRST] cannot operate on first argument field of data type [text]", + error("SELECT FIRST(text) FROM test")); + } + + public void testTopHitsSecondArgTextWithNoKeyword() { + assertEquals("1:8: [LAST] cannot operate on second argument field of data type [text]", + error("SELECT LAST(keyword, text) FROM test")); + } + + public void testTopHitsGroupByHavingUnsupported() { + assertEquals("1:50: HAVING filter is unsupported for function [FIRST(int)]", + error("SELECT FIRST(int) FROM test GROUP BY text HAVING FIRST(int) > 10")); + } + + public void testMinOnKeywordGroupByHavingUnsupported() { + assertEquals("1:52: HAVING filter is unsupported for function [MIN(keyword)]", + error("SELECT MIN(keyword) FROM test GROUP BY text HAVING MIN(keyword) > 10")); + } + + public void testMaxOnKeywordGroupByHavingUnsupported() { + assertEquals("1:52: HAVING filter is unsupported for function [MAX(keyword)]", + error("SELECT MAX(keyword) FROM test GROUP BY text HAVING MAX(keyword) > 10")); + } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractorTests.java new file mode 100644 index 0000000000000..741fd5413be9c --- /dev/null +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/execution/search/extractor/TopHitsAggExtractorTests.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.sql.execution.search.extractor; + +import org.apache.lucene.search.TotalHits; +import org.elasticsearch.common.document.DocumentField; +import org.elasticsearch.common.io.stream.Writeable.Reader; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.Aggregations; +import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation.Bucket; +import org.elasticsearch.search.aggregations.metrics.InternalTopHits; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.xpack.sql.SqlException; +import org.elasticsearch.xpack.sql.type.DataType; +import org.elasticsearch.xpack.sql.util.DateUtils; + +import java.util.Collections; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonList; + +public class TopHitsAggExtractorTests extends AbstractWireSerializingTestCase { + + public static TopHitsAggExtractor randomTopHitsAggExtractor() { + return new TopHitsAggExtractor(randomAlphaOfLength(16), randomFrom(DataType.values())); + } + + @Override + protected TopHitsAggExtractor createTestInstance() { + return randomTopHitsAggExtractor(); + } + + @Override + protected Reader instanceReader() { + return TopHitsAggExtractor::new; + } + + @Override + protected TopHitsAggExtractor mutateInstance(TopHitsAggExtractor instance) { + return new TopHitsAggExtractor(instance.name() + "mutated", randomFrom(DataType.values())); + } + + public void testNoAggs() { + Bucket bucket = new TestBucket(emptyMap(), 0, new Aggregations(emptyList())); + TopHitsAggExtractor extractor = randomTopHitsAggExtractor(); + SqlException exception = expectThrows(SqlException.class, () -> extractor.extract(bucket)); + assertEquals("Cannot find an aggregation named " + extractor.name(), exception.getMessage()); + } + + public void testZeroNullValue() { + TopHitsAggExtractor extractor = randomTopHitsAggExtractor(); + + TotalHits totalHits = new TotalHits(0, TotalHits.Relation.EQUAL_TO); + Aggregation agg = new InternalTopHits(extractor.name(), 0, 0, null, new SearchHits(null, totalHits, 0.0f), null, null); + Bucket bucket = new TestBucket(emptyMap(), 0, new Aggregations(singletonList(agg))); + assertNull(extractor.extract(bucket)); + } + + public void testExtractValue() { + TopHitsAggExtractor extractor = new TopHitsAggExtractor("topHitsAgg", DataType.KEYWORD); + + String value = "Str_Value"; + Aggregation agg = new InternalTopHits(extractor.name(), 0, 1, null, searchHitsOf(value), null, null); + Bucket bucket = new TestBucket(emptyMap(), 0, new Aggregations(singletonList(agg))); + assertEquals(value, extractor.extract(bucket)); + } + + public void testExtractDateValue() { + TopHitsAggExtractor extractor = new TopHitsAggExtractor("topHitsAgg", DataType.DATETIME); + + long value = 123456789L; + Aggregation agg = new InternalTopHits(extractor.name(), 0, 1, null, searchHitsOf(value), null, null); + Bucket bucket = new TestBucket(emptyMap(), 0, new Aggregations(singletonList(agg))); + assertEquals(DateUtils.asDateTime(value), extractor.extract(bucket)); + } + + private SearchHits searchHitsOf(Object value) { + TotalHits totalHits = new TotalHits(10, TotalHits.Relation.EQUAL_TO); + return new SearchHits(new SearchHit[] {new SearchHit(1, "docId", null, + Collections.singletonMap("topHitsAgg", new DocumentField("field", Collections.singletonList(value))))}, + totalHits, 0.0f); + } +} diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java index 6873e4a107fb6..286524518e960 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/optimizer/OptimizerTests.java @@ -20,6 +20,10 @@ import org.elasticsearch.xpack.sql.expression.function.Function; import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.sql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.sql.expression.function.aggregate.First; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Last; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Max; +import org.elasticsearch.xpack.sql.expression.function.aggregate.Min; import org.elasticsearch.xpack.sql.expression.function.scalar.Cast; import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DayName; import org.elasticsearch.xpack.sql.expression.function.scalar.datetime.DayOfMonth; @@ -75,7 +79,9 @@ import org.elasticsearch.xpack.sql.optimizer.Optimizer.PropagateEquals; import org.elasticsearch.xpack.sql.optimizer.Optimizer.PruneDuplicateFunctions; import org.elasticsearch.xpack.sql.optimizer.Optimizer.ReplaceFoldableAttributes; +import org.elasticsearch.xpack.sql.optimizer.Optimizer.ReplaceMinMaxWithTopHits; import org.elasticsearch.xpack.sql.optimizer.Optimizer.SimplifyConditional; +import org.elasticsearch.xpack.sql.plan.logical.Aggregate; import org.elasticsearch.xpack.sql.plan.logical.Filter; import org.elasticsearch.xpack.sql.plan.logical.LocalRelation; import org.elasticsearch.xpack.sql.plan.logical.LogicalPlan; @@ -1206,4 +1212,51 @@ public void testEliminateRangeByNullEqualsOutsideInterval() { Expression exp = rule.rule(new And(EMPTY, eq1, r)); assertEquals(Literal.FALSE, rule.rule(exp)); } + + public void testTranslateMinToFirst() { + Min min1 = new Min(EMPTY, new FieldAttribute(EMPTY, "str", new EsField("str", DataType.KEYWORD, emptyMap(), true))); + Min min2 = new Min(EMPTY, getFieldAttribute()); + + OrderBy plan = new OrderBy(EMPTY, new Aggregate(EMPTY, FROM(), emptyList(), Arrays.asList(min1, min2)), + Arrays.asList( + new Order(EMPTY, min1, OrderDirection.ASC, Order.NullsPosition.LAST), + new Order(EMPTY, min2, OrderDirection.ASC, Order.NullsPosition.LAST))); + LogicalPlan result = new ReplaceMinMaxWithTopHits().apply(plan); + assertTrue(result instanceof OrderBy); + List order = ((OrderBy) result).order(); + assertEquals(2, order.size()); + assertEquals(First.class, order.get(0).child().getClass()); + assertEquals(min2, order.get(1).child());; + First first = (First) order.get(0).child(); + + assertTrue(((OrderBy) result).child() instanceof Aggregate); + List aggregates = ((Aggregate) ((OrderBy) result).child()).aggregates(); + assertEquals(2, aggregates.size()); + assertEquals(First.class, aggregates.get(0).getClass()); + assertSame(first, aggregates.get(0)); + assertEquals(min2, aggregates.get(1)); + } + + public void testTranslateMaxToLast() { + Max max1 = new Max(EMPTY, new FieldAttribute(EMPTY, "str", new EsField("str", DataType.KEYWORD, emptyMap(), true))); + Max max2 = new Max(EMPTY, getFieldAttribute()); + + OrderBy plan = new OrderBy(EMPTY, new Aggregate(EMPTY, FROM(), emptyList(), Arrays.asList(max1, max2)), + Arrays.asList( + new Order(EMPTY, max1, OrderDirection.ASC, Order.NullsPosition.LAST), + new Order(EMPTY, max2, OrderDirection.ASC, Order.NullsPosition.LAST))); + LogicalPlan result = new ReplaceMinMaxWithTopHits().apply(plan); + assertTrue(result instanceof OrderBy); + List order = ((OrderBy) result).order(); + assertEquals(Last.class, order.get(0).child().getClass()); + assertEquals(max2, order.get(1).child());; + Last last = (Last) order.get(0).child(); + + assertTrue(((OrderBy) result).child() instanceof Aggregate); + List aggregates = ((Aggregate) ((OrderBy) result).child()).aggregates(); + assertEquals(2, aggregates.size()); + assertEquals(Last.class, aggregates.get(0).getClass()); + assertSame(last, aggregates.get(0)); + assertEquals(max2, aggregates.get(1)); + } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 704e4d7147e78..2cad625889474 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -617,4 +617,61 @@ public void testAllCountVariantsWithHavingGenerateCorrectAggregations() { + "\"lang\":\"painless\",\"params\":{\"v0\":3,\"v1\":32,\"v2\":1,\"v3\":2,\"v4\":5,\"v5\":50000}}," + "\"gap_policy\":\"skip\"}}}}}")); } + + public void testTopHitsAggregationWithOneArg() { + { + PhysicalPlan p = optimizeAndPlan("SELECT FIRST(keyword) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("FIRST(keyword)", eqe.output().get(0).qualifiedName()); + assertTrue(eqe.output().get(0).dataType() == DataType.KEYWORD); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false," + + "\"explain\":false,\"docvalue_fields\":[{\"field\":\"keyword\"}]," + + "\"sort\":[{\"keyword\":{\"order\":\"asc\",\"missing\":\"_last\",\"unmapped_type\":\"keyword\"}}]}}}}}")); + } + { + PhysicalPlan p = optimizeAndPlan("SELECT LAST(keyword) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("LAST(keyword)", eqe.output().get(0).qualifiedName()); + assertTrue(eqe.output().get(0).dataType() == DataType.KEYWORD); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false," + + "\"explain\":false,\"docvalue_fields\":[{\"field\":\"keyword\"}]," + + "\"sort\":[{\"keyword\":{\"order\":\"desc\",\"missing\":\"_last\",\"unmapped_type\":\"keyword\"}}]}}}}}")); + } + } + + public void testTopHitsAggregationWithTwoArgs() { + { + PhysicalPlan p = optimizeAndPlan("SELECT FIRST(keyword, int) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("FIRST(keyword, int)", eqe.output().get(0).qualifiedName()); + assertTrue(eqe.output().get(0).dataType() == DataType.KEYWORD); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false," + + "\"explain\":false,\"docvalue_fields\":[{\"field\":\"keyword\"}]," + + "\"sort\":[{\"int\":{\"order\":\"asc\",\"missing\":\"_last\",\"unmapped_type\":\"integer\"}}," + + "{\"keyword\":{\"order\":\"asc\",\"missing\":\"_last\",\"unmapped_type\":\"keyword\"}}]}}}}}")); + + } + { + PhysicalPlan p = optimizeAndPlan("SELECT LAST(keyword, int) FROM test"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("LAST(keyword, int)", eqe.output().get(0).qualifiedName()); + assertTrue(eqe.output().get(0).dataType() == DataType.KEYWORD); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"top_hits\":{\"from\":0,\"size\":1,\"version\":false,\"seq_no_primary_term\":false," + + "\"explain\":false,\"docvalue_fields\":[{\"field\":\"keyword\"}]," + + "\"sort\":[{\"int\":{\"order\":\"desc\",\"missing\":\"_last\",\"unmapped_type\":\"integer\"}}," + + "{\"keyword\":{\"order\":\"desc\",\"missing\":\"_last\",\"unmapped_type\":\"keyword\"}}]}}}}}")); + } + } } diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/type/TypesTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/type/TypesTests.java index fd7b88330d3c3..a09a28ced7d5a 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/type/TypesTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/type/TypesTests.java @@ -10,7 +10,6 @@ import org.elasticsearch.test.ESTestCase; import java.io.InputStream; -import java.util.List; import java.util.Map; import static java.util.Collections.emptyMap; @@ -20,7 +19,6 @@ import static org.elasticsearch.xpack.sql.type.DataType.NESTED; import static org.elasticsearch.xpack.sql.type.DataType.OBJECT; import static org.elasticsearch.xpack.sql.type.DataType.TEXT; -import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; @@ -84,10 +82,6 @@ public void testDateField() { assertThat(field.getDataType(), is(DATETIME)); assertThat(field.isAggregatable(), is(true)); assertThat(field.getPrecision(), is(24)); - - DateEsField dfield = (DateEsField) field; - List formats = dfield.getFormats(); - assertThat(formats, hasSize(3)); } public void testDateNoFormat() { @@ -98,8 +92,6 @@ public void testDateNoFormat() { assertThat(field.getDataType(), is(DATETIME)); assertThat(field.isAggregatable(), is(true)); DateEsField dfield = (DateEsField) field; - // default types - assertThat(dfield.getFormats(), hasSize(2)); } public void testDateMulti() { @@ -110,8 +102,6 @@ public void testDateMulti() { assertThat(field.getDataType(), is(DATETIME)); assertThat(field.isAggregatable(), is(true)); DateEsField dfield = (DateEsField) field; - // default types - assertThat(dfield.getFormats(), hasSize(1)); } public void testDocValueField() {