From 91dca8dab9f3478447de3b47a5a243a61bd68ee9 Mon Sep 17 00:00:00 2001 From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:27:03 -0400 Subject: [PATCH] [ES|QL] Validate index name in parser (#112081) * validate index name in parser --- docs/changelog/112081.yaml | 5 + .../xpack/esql/core/util/StringUtils.java | 1 + .../xpack/esql/parser/IdentifierBuilder.java | 50 ++++ .../parser/AbstractStatementParserTests.java | 13 + .../esql/parser/StatementParserTests.java | 227 +++++++++++++++++- 5 files changed, 284 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/112081.yaml diff --git a/docs/changelog/112081.yaml b/docs/changelog/112081.yaml new file mode 100644 index 0000000000000..a4009e01fca71 --- /dev/null +++ b/docs/changelog/112081.yaml @@ -0,0 +1,5 @@ +pr: 112081 +summary: "[ES|QL] Validate index name in parser" +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java index 1bfd94730c4fc..288612c9a593d 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/util/StringUtils.java @@ -39,6 +39,7 @@ private StringUtils() {} public static final String NEW_LINE = "\n"; public static final String SQL_WILDCARD = "%"; public static final String WILDCARD = "*"; + public static final String EXCLUSION = "-"; private static final String[] INTEGER_ORDINALS = new String[] { "th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th" }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java index 9ccbb00ea4b5b..51c2eef3d75c3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/IdentifierBuilder.java @@ -8,7 +8,12 @@ package org.elasticsearch.xpack.esql.parser; import org.antlr.v4.runtime.tree.TerminalNode; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.MetadataCreateIndexService; import org.elasticsearch.common.Strings; +import org.elasticsearch.indices.InvalidIndexNameException; +import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IdentifierContext; import org.elasticsearch.xpack.esql.parser.EsqlBaseParser.IndexStringContext; @@ -16,6 +21,9 @@ import java.util.List; import static org.elasticsearch.transport.RemoteClusterAware.REMOTE_CLUSTER_INDEX_SEPARATOR; +import static org.elasticsearch.xpack.esql.core.util.StringUtils.EXCLUSION; +import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; +import static org.elasticsearch.xpack.esql.parser.ParserUtils.source; abstract class IdentifierBuilder extends AbstractBuilder { @@ -46,12 +54,54 @@ public String visitIndexString(IndexStringContext ctx) { public String visitIndexPattern(List ctx) { List patterns = new ArrayList<>(ctx.size()); + Holder hasSeenStar = new Holder<>(false); ctx.forEach(c -> { String indexPattern = visitIndexString(c.indexString()); + hasSeenStar.set(indexPattern.contains(WILDCARD) || hasSeenStar.get()); + validateIndexPattern(indexPattern, c, hasSeenStar.get()); patterns.add( c.clusterString() != null ? c.clusterString().getText() + REMOTE_CLUSTER_INDEX_SEPARATOR + indexPattern : indexPattern ); }); return Strings.collectionToDelimitedString(patterns, ","); } + + private static void validateIndexPattern(String indexPattern, EsqlBaseParser.IndexPatternContext ctx, boolean hasSeenStar) { + // multiple index names can be in the same double quote, e.g. indexPattern = "idx1, *, -idx2" + String[] indices = indexPattern.split(","); + boolean hasExclusion = false; + for (String index : indices) { + hasSeenStar = index.contains(WILDCARD) || hasSeenStar; + index = index.replace(WILDCARD, "").strip(); + if (index.isBlank()) { + continue; + } + hasExclusion = index.startsWith(EXCLUSION); + index = removeExclusion(index); + String tempName; + try { + // remove the exclusion outside of <>, from index names with DateMath expression, + // e.g. -<-logstash-{now/d}> becomes <-logstash-{now/d}> before calling resolveDateMathExpression + tempName = IndexNameExpressionResolver.resolveDateMathExpression(index); + } catch (ElasticsearchParseException e) { + // throws exception if the DateMath expression is invalid, resolveDateMathExpression does not complain about exclusions + throw new ParsingException(e, source(ctx), e.getMessage()); + } + hasExclusion = tempName.startsWith(EXCLUSION) || hasExclusion; + index = tempName.equals(index) ? index : removeExclusion(tempName); + try { + MetadataCreateIndexService.validateIndexOrAliasName(index, InvalidIndexNameException::new); + } catch (InvalidIndexNameException e) { + // ignore invalid index name if it has exclusions and there is an index with wildcard before it + if (hasSeenStar && hasExclusion) { + continue; + } + throw new ParsingException(e, source(ctx), e.getMessage()); + } + } + } + + private static String removeExclusion(String indexPattern) { + return indexPattern.charAt(0) == EXCLUSION.charAt(0) ? indexPattern.substring(1) : indexPattern; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java index 697dfcf0a8e6b..e3574576e478f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/AbstractStatementParserTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.parser; +import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.core.expression.Literal; @@ -38,6 +39,10 @@ void assertStatement(String statement, LogicalPlan expected) { assertThat(statement, actual, equalTo(expected)); } + LogicalPlan statement(String query, String arg) { + return statement(LoggerMessageFormat.format(null, query, arg), new QueryParams()); + } + LogicalPlan statement(String e) { return statement(e, new QueryParams()); } @@ -124,4 +129,12 @@ void expectError(String query, List params, String errorMessage) { ); assertThat(e.getMessage(), containsString(errorMessage)); } + + void expectInvalidIndexNameErrorWithLineNumber(String query, String arg, String lineNumber, String indexString) { + expectError(LoggerMessageFormat.format(null, query, arg), lineNumber + "Invalid index name [" + indexString); + } + + void expectDateMathErrorWithLineNumber(String query, String arg, String lineNumber, String error) { + expectError(LoggerMessageFormat.format(null, query, arg), lineNumber + error); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java index c5a5bfac023c1..adf31ca983067 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java @@ -50,6 +50,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.UnresolvedRelation; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Function; @@ -377,24 +378,23 @@ public void testStringAsIndexPattern() { ",", command + " , \"\"" ); - + assertStringAsIndexPattern( + "-,-<-logstash-{now/M{yyyy.MM}}>," + + "-,-<-logstash-{now/d{yyyy.MM.dd|+12:00}}>", + command + + " -, -<-logstash-{now/M{yyyy.MM}}>, " + + "\"-\", \"-<-logstash-{now/d{yyyy.MM.dd|+12:00}}>\"" + ); assertStringAsIndexPattern("foo,test,xyz", command + " \"\"\"foo\"\"\", test,\"xyz\""); - assertStringAsIndexPattern("`backtick`,``multiple`back``ticks```", command + " `backtick`, ``multiple`back``ticks```"); - assertStringAsIndexPattern("test,metadata,metaata,.metadata", command + " test,\"metadata\", metaata, .metadata"); - assertStringAsIndexPattern(".dot", command + " .dot"); - assertStringAsIndexPattern("cluster:index", command + " cluster:index"); - assertStringAsIndexPattern("cluster:index|pattern", command + " cluster:\"index|pattern\""); assertStringAsIndexPattern("cluster:.index", command + " cluster:.index"); assertStringAsIndexPattern("cluster*:index*", command + " cluster*:index*"); assertStringAsIndexPattern("cluster*:*", command + " cluster*:*"); assertStringAsIndexPattern("*:index*", command + " *:index*"); - assertStringAsIndexPattern("*:index|pattern", command + " *:\"index|pattern\""); assertStringAsIndexPattern("*:*", command + " *:*"); - assertStringAsIndexPattern("*:*,cluster*:index|pattern,i|p", command + " *:*, cluster*:\"index|pattern\", \"i|p\""); } } @@ -416,19 +416,222 @@ public void testStringAsLookupIndexPattern() { ); assertStringAsLookupIndexPattern("foo", "ROW x = 1 | LOOKUP \"\"\"foo\"\"\" ON j"); - assertStringAsLookupIndexPattern("`backtick`", "ROW x = 1 | LOOKUP `backtick` ON j"); assertStringAsLookupIndexPattern("``multiple`back``ticks```", "ROW x = 1 | LOOKUP ``multiple`back``ticks``` ON j"); - assertStringAsLookupIndexPattern(".dot", "ROW x = 1 | LOOKUP .dot ON j"); - assertStringAsLookupIndexPattern("cluster:index", "ROW x = 1 | LOOKUP cluster:index ON j"); assertStringAsLookupIndexPattern("cluster:.index", "ROW x = 1 | LOOKUP cluster:.index ON j"); assertStringAsLookupIndexPattern("cluster*:index*", "ROW x = 1 | LOOKUP cluster*:index* ON j"); assertStringAsLookupIndexPattern("cluster*:*", "ROW x = 1 | LOOKUP cluster*:* ON j"); assertStringAsLookupIndexPattern("*:index*", "ROW x = 1 | LOOKUP *:index* ON j"); assertStringAsLookupIndexPattern("*:*", "ROW x = 1 | LOOKUP *:* ON j"); + } + public void testInvalidCharacterInIndexPattern() { + Map commands = new HashMap<>(); + commands.put("FROM {}", "line 1:8: "); + if (Build.current().isSnapshot()) { + commands.put("METRICS {}", "line 1:11: "); + commands.put("ROW x = 1 | LOOKUP {} ON j", "line 1:22: "); + } + List clusterStrings = List.of(" ", " *:", " cluster:"); + String lineNumber; + for (String command : commands.keySet()) { + lineNumber = commands.get(command); + for (String clusterString : clusterStrings) { + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index|pattern\"", lineNumber, "index|pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index pattern\"", lineNumber, "index pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index#pattern\"", lineNumber, "index#pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "index#pattern", lineNumber, "index#pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index?pattern\"", lineNumber, "index?pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "index?pattern", lineNumber, "index?pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index>pattern\"", lineNumber, "index>pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "index>pattern", lineNumber, "index>pattern"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"index\"", + lineNumber, + "-logstash-" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "--", + lineNumber, + "-" + ); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"\"", lineNumber, "logstash#"); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "", lineNumber, "logstash#"); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "\"+\"", + lineNumber, + "+" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "+", + lineNumber, + "+" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "\"_\"", + lineNumber, + "_" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "_", + lineNumber, + "_" + ); + expectInvalidIndexNameErrorWithLineNumber(command, clusterString + "\"<>\"", lineNumber, ">", lineNumber, ">>\"", lineNumber, ">>", lineNumber, "\"", + lineNumber, + "logstash- " + ); + } + } + + // comma separated indices + // Invalid index names after removing exclusion fail, when there is no index name with wildcard before it + for (String command : commands.keySet()) { + if (command.contains("LOOKUP")) { + continue; + } + for (String clusterString : clusterStrings) { + lineNumber = command.contains("FROM") + ? "line 1:" + (22 + clusterString.length() - 1) + ": " + : "line 1:" + (25 + clusterString.length() - 1) + ": "; + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "indexpattern, --indexpattern", + lineNumber, + "-indexpattern" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "indexpattern, \"--indexpattern\"", + lineNumber, + "-indexpattern" + ); + expectInvalidIndexNameErrorWithLineNumber( + command, + clusterString + "\"indexpattern, --indexpattern\"", + commands.get(command), + "-indexpattern" + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "indexpattern,-indexpattern"), + statement(command, clusterString + "indexpattern, -indexpattern") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "indexpattern,-indexpattern"), + statement(command, clusterString + "indexpattern, \"-indexpattern\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "indexpattern, -indexpattern"), + statement(command, clusterString + "\"indexpattern, -indexpattern\"") + ); + } + } + + // Invalid index names, except invalid DateMath, are ignored if there is an index name with wildcard before it + for (String command : commands.keySet()) { + if (command.contains("LOOKUP")) { + continue; + } + for (String clusterString : clusterStrings) { + lineNumber = command.contains("FROM") + ? "line 1:" + (11 + clusterString.length() - 1) + ": " + : "line 1:" + (14 + clusterString.length() - 1) + ": "; + assertEquals( + unresolvedRelation(clusterString.strip() + "*,-index#pattern"), + statement(command, clusterString + "*, \"-index#pattern\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "*,-index#pattern"), + statement(command, clusterString + "*, -index#pattern") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "*, -index#pattern"), + statement(command, clusterString + "\"*, -index#pattern\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*,-index#pattern"), + statement(command, clusterString + "index*, \"-index#pattern\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*,-index#pattern"), + statement(command, clusterString + "index*, -index#pattern") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*, -index#pattern"), + statement(command, clusterString + "\"index*, -index#pattern\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "*,-<--logstash-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "*, \"-<--logstash-{now/M{yyyy.MM}}>\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "*,-<--logstash-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "*, -<--logstash-{now/M{yyyy.MM}}>") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "*, -<--logstash-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "\"*, -<--logstash-{now/M{yyyy.MM}}>\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*,-<--logstash#-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "index*, \"-<--logstash#-{now/M{yyyy.MM}}>\"") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*,-<--logstash#-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "index*, -<--logstash#-{now/M{yyyy.MM}}>") + ); + assertEquals( + unresolvedRelation(clusterString.strip() + "index*, -<--logstash#-{now/M{yyyy.MM}}>"), + statement(command, clusterString + "\"index*, -<--logstash#-{now/M{yyyy.MM}}>\"") + ); + expectDateMathErrorWithLineNumber( + command, + clusterString + "*, \"-<-logstash-{now/D}>\"", + lineNumber, + "unit [D] not supported for date math [/D]" + ); + expectDateMathErrorWithLineNumber( + command, + clusterString + "*, -<-logstash-{now/D}>", + lineNumber, + "unit [D] not supported for date math [/D]" + ); + expectDateMathErrorWithLineNumber( + command, + clusterString + "\"*, -<-logstash-{now/D}>\"", + commands.get(command), + "unit [D] not supported for date math [/D]" + ); + } + } } public void testInvalidQuotingAsFromIndexPattern() { @@ -1569,7 +1772,7 @@ public void testMetricsIdentifiers() { Map.entry("metrics foo,test-*", "foo,test-*"), Map.entry("metrics 123-test@foo_bar+baz1", "123-test@foo_bar+baz1"), Map.entry("metrics foo, test,xyz", "foo,test,xyz"), - Map.entry("metrics >", ">") + Map.entry("metrics ", "") ); for (Map.Entry e : patterns.entrySet()) { assertStatement(e.getKey(), unresolvedRelation(e.getValue()));