diff --git a/x-pack/plugin/esql/build.gradle b/x-pack/plugin/esql/build.gradle index 26cf53b334b1e..0225664918b7b 100644 --- a/x-pack/plugin/esql/build.gradle +++ b/x-pack/plugin/esql/build.gradle @@ -11,7 +11,7 @@ esplugin { name 'x-pack-esql' description 'The plugin that powers ESQL for Elasticsearch' classname 'org.elasticsearch.xpack.esql.plugin.EsqlPlugin' - extendedPlugins = ['x-pack-esql-core', 'lang-painless'] + extendedPlugins = ['x-pack-esql-core', 'lang-painless', 'x-pack-ml'] } base { @@ -22,6 +22,7 @@ dependencies { compileOnly project(path: xpackModule('core')) compileOnly project(':modules:lang-painless:spi') compileOnly project(xpackModule('esql-core')) + compileOnly project(xpackModule('ml')) implementation project('compute') implementation project('compute:ann') implementation project(':libs:elasticsearch-dissect') diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle index 971bfd39c231f..81d1a6f5360ca 100644 --- a/x-pack/plugin/esql/compute/build.gradle +++ b/x-pack/plugin/esql/compute/build.gradle @@ -11,11 +11,14 @@ base { dependencies { compileOnly project(':server') compileOnly project('ann') + compileOnly project(xpackModule('ml')) annotationProcessor project('gen') implementation 'com.carrotsearch:hppc:0.8.1' testImplementation project(':test:framework') testImplementation(project(xpackModule('esql-core'))) + testImplementation(project(xpackModule('core'))) + testImplementation(project(xpackModule('ml'))) } def projectDirectory = project.layout.projectDirectory diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java index dc8cda0fbe3c8..1739c90467c2c 100644 --- a/x-pack/plugin/esql/compute/src/main/java/module-info.java +++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java @@ -7,6 +7,7 @@ module org.elasticsearch.compute { + requires org.apache.lucene.analysis.common; requires org.apache.lucene.core; requires org.elasticsearch.base; requires org.elasticsearch.server; @@ -15,6 +16,7 @@ // required due to dependency on org.elasticsearch.common.util.concurrent.AbstractAsyncTask requires org.apache.logging.log4j; requires org.elasticsearch.logging; + requires org.elasticsearch.ml; requires org.elasticsearch.tdigest; requires org.elasticsearch.geo; requires hppc; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec new file mode 100644 index 0000000000000..076f3ee092ecf --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -0,0 +1,13 @@ +categorize +required_capability: categorize + +FROM sample_data + | STATS count=COUNT(), values=VALUES(message) BY category=CATEGORIZE(message) + | SORT count DESC, category ASC +; + +count:long | values:keyword | category:integer +3 | [Connected to 10.1.0.1, Connected to 10.1.0.2, Connected to 10.1.0.3] | 0 +3 | [Connection error] | 1 +1 | [Disconnected] | 2 +; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java new file mode 100644 index 0000000000000..93bc8ce3e2a1b --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java @@ -0,0 +1,131 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.grouping; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.Warnings; +import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; +import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Categorize}. + * This class is generated. Do not edit it. + */ +public final class CategorizeEvaluator implements EvalOperator.ExpressionEvaluator { + private final Warnings warnings; + + private final EvalOperator.ExpressionEvaluator v; + + private final CategorizationAnalyzer analyzer; + + private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; + + private final DriverContext driverContext; + + public CategorizeEvaluator(Source source, EvalOperator.ExpressionEvaluator v, + CategorizationAnalyzer analyzer, + TokenListCategorizer.CloseableTokenListCategorizer categorizer, DriverContext driverContext) { + this.v = v; + this.analyzer = analyzer; + this.categorizer = categorizer; + this.driverContext = driverContext; + this.warnings = Warnings.createWarnings(driverContext.warningsMode(), source); + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock vBlock = (BytesRefBlock) v.eval(page)) { + BytesRefVector vVector = vBlock.asVector(); + if (vVector == null) { + return eval(page.getPositionCount(), vBlock); + } + return eval(page.getPositionCount(), vVector).asBlock(); + } + } + + public IntBlock eval(int positionCount, BytesRefBlock vBlock) { + try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (vBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (vBlock.getValueCount(p) != 1) { + if (vBlock.getValueCount(p) > 1) { + warnings.registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + result.appendInt(Categorize.process(vBlock.getBytesRef(vBlock.getFirstValueIndex(p), vScratch), this.analyzer, this.categorizer)); + } + return result.build(); + } + } + + public IntVector eval(int positionCount, BytesRefVector vVector) { + try(IntVector.FixedBuilder result = driverContext.blockFactory().newIntVectorFixedBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + result.appendInt(p, Categorize.process(vVector.getBytesRef(p, vScratch), this.analyzer, this.categorizer)); + } + return result.build(); + } + } + + @Override + public String toString() { + return "CategorizeEvaluator[" + "v=" + v + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(v, analyzer, categorizer); + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory v; + + private final Function analyzer; + + private final Function categorizer; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory v, + Function analyzer, + Function categorizer) { + this.source = source; + this.v = v; + this.analyzer = analyzer; + this.categorizer = categorizer; + } + + @Override + public CategorizeEvaluator get(DriverContext context) { + return new CategorizeEvaluator(source, v.get(context), analyzer.apply(context), categorizer.apply(context), context); + } + + @Override + public String toString() { + return "CategorizeEvaluator[" + "v=" + v + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 49076a1d65e72..ce8c20e4fbf11 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -33,6 +33,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.aggregate.WeightedAvg; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least; @@ -383,7 +384,10 @@ private FunctionDefinition[][] functions() { } private static FunctionDefinition[][] snapshotFunctions() { - return new FunctionDefinition[][] { new FunctionDefinition[] { def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } }; + return new FunctionDefinition[][] { + new FunctionDefinition[] { + def(Categorize.class, Categorize::new, "categorize"), + def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java new file mode 100644 index 0000000000000..82c836a6f9d49 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -0,0 +1,159 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.grouping; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.ann.Fixed; +import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.xpack.esql.capabilities.Validatable; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary; +import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; +import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; + +import java.io.IOException; +import java.util.List; +import java.util.function.Function; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +/** + * Categorizes text messages. + * + * This implementation is incomplete and comes with the following caveats: + * - it only works correctly on a single node. + * - when running on multiple nodes, category IDs of the different nodes are + * aggregated, even though the same ID can correspond to a totally different + * category + * - the output consists of category IDs, which should be replaced by category + * regexes or keys + * + * TODO(jan, nik): fix this + */ +public class Categorize extends GroupingFunction implements Validatable { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "Categorize", + Categorize::new + ); + + private final Expression field; + + @FunctionInfo(returnType = { "integer" }, description = "Categorizes text messages") + public Categorize( + Source source, + @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field + ) { + super(source, List.of(field)); + this.field = field; + } + + private Categorize(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public boolean foldable() { + return field.foldable(); + } + + @Evaluator + static int process( + BytesRef v, + @Fixed(includeInToString = false, build = true) CategorizationAnalyzer analyzer, + @Fixed(includeInToString = false, build = true) TokenListCategorizer.CloseableTokenListCategorizer categorizer + ) { + String s = v.utf8ToString(); + try (TokenStream ts = analyzer.tokenStream("text", s)) { + return categorizer.computeCategory(ts, s.length(), 1).getId(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public ExpressionEvaluator.Factory toEvaluator(Function toEvaluator) { + return new CategorizeEvaluator.Factory( + source(), + toEvaluator.apply(field), + context -> new CategorizationAnalyzer( + // TODO(jan): get the correct analyzer in here, see CategorizationAnalyzerConfig::buildStandardCategorizationAnalyzer + new CustomAnalyzer( + TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), + new CharFilterFactory[0], + new TokenFilterFactory[0] + ), + true + ), + context -> new TokenListCategorizer.CloseableTokenListCategorizer( + new CategorizationBytesRefHash(new BytesRefHash(2048, context.bigArrays())), + CategorizationPartOfSpeechDictionary.getInstance(), + 0.70f + ) + ); + } + + @Override + protected TypeResolution resolveType() { + return isString(field(), sourceText(), DEFAULT); + } + + @Override + public DataType dataType() { + return DataType.INTEGER; + } + + @Override + public Expression replaceChildren(List newChildren) { + return new Categorize(source(), newChildren.get(0)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Categorize::new, field); + } + + public Expression field() { + return field; + } + + @Override + public String toString() { + return "Categorize{field=" + field + "}"; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java index 563847473c992..14b0c872a3b86 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Case; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Greatest; import org.elasticsearch.xpack.esql.expression.function.scalar.conditional.Least; @@ -73,6 +74,7 @@ public static List getNamedWriteables() { entries.add(Atan2.ENTRY); entries.add(Bucket.ENTRY); entries.add(Case.ENTRY); + entries.add(Categorize.ENTRY); entries.add(CIDRMatch.ENTRY); entries.add(Coalesce.ENTRY); entries.add(Concat.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java index 4b6a38a3e8762..f19e6523aa075 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlFeatures.java @@ -179,9 +179,14 @@ public class EsqlFeatures implements FeatureSpecification { */ public static final NodeFeature RESOLVE_FIELDS_API = new NodeFeature("esql.resolve_fields_api"); + /** + * Support categorize + */ + public static final NodeFeature CATEGORIZE = new NodeFeature("esql.categorize"); + private Set snapshotBuildFeatures() { assert Build.current().isSnapshot() : Build.current(); - return Set.of(METRICS_SYNTAX); + return Set.of(METRICS_SYNTAX, CATEGORIZE); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java new file mode 100644 index 0000000000000..f93389d5cb659 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.grouping; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class CategorizeTests extends AbstractScalarFunctionTestCase { + public CategorizeTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + for (DataType dataType : List.of(DataType.KEYWORD, DataType.TEXT)) { + suppliers.add( + new TestCaseSupplier( + "text with " + dataType.typeName(), + List.of(dataType), + () -> new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(new BytesRef("blah blah blah"), dataType, "f")), + "CategorizeEvaluator[v=Attribute[channel=0]]", + DataType.INTEGER, + equalTo(0) + ) + ) + ); + } + return parameterSuppliersFromTypedDataWithDefaultChecks(true, suppliers, (v, p) -> "string"); + } + + @Override + protected Expression build(Source source, List args) { + return new Categorize(source, args.get(0)); + } +} diff --git a/x-pack/plugin/ml/src/main/java/module-info.java b/x-pack/plugin/ml/src/main/java/module-info.java index 0f3fdd836feca..4984fa8912e28 100644 --- a/x-pack/plugin/ml/src/main/java/module-info.java +++ b/x-pack/plugin/ml/src/main/java/module-info.java @@ -37,6 +37,8 @@ exports org.elasticsearch.xpack.ml; exports org.elasticsearch.xpack.ml.action; + exports org.elasticsearch.xpack.ml.aggs.categorization; exports org.elasticsearch.xpack.ml.autoscaling; + exports org.elasticsearch.xpack.ml.job.categorization; exports org.elasticsearch.xpack.ml.notifications; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizationBytesRefHash.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizationBytesRefHash.java index 58feb24480f87..7d5f1d5517de0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizationBytesRefHash.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizationBytesRefHash.java @@ -12,11 +12,11 @@ import org.elasticsearch.common.util.BytesRefHash; import org.elasticsearch.core.Releasable; -class CategorizationBytesRefHash implements Releasable { +public class CategorizationBytesRefHash implements Releasable { private final BytesRefHash bytesRefHash; - CategorizationBytesRefHash(BytesRefHash bytesRefHash) { + public CategorizationBytesRefHash(BytesRefHash bytesRefHash) { this.bytesRefHash = bytesRefHash; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index cedaced0f57ee..e55736cf43607 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -94,11 +94,11 @@ protected CategorizeTextAggregator( true ); } - this.categorizers = bigArrays().newObjectArray(1); + this.categorizers = context.bigArrays().newObjectArray(1); this.similarityThreshold = similarityThreshold; - this.bucketOrds = LongKeyedBucketOrds.build(bigArrays(), CardinalityUpperBound.MANY); + this.bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), CardinalityUpperBound.MANY); this.bucketCountThresholds = bucketCountThresholds; - this.bytesRefHash = new CategorizationBytesRefHash(new BytesRefHash(2048, bigArrays())); + this.bytesRefHash = new CategorizationBytesRefHash(new BytesRefHash(2048, context.bigArrays())); // TODO: make it possible to choose a language instead of or as well as English for the part-of-speech dictionary this.partOfSpeechDictionary = CategorizationPartOfSpeechDictionary.getInstance(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java index e1f2404ee56b5..d0088edcb0805 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java @@ -14,6 +14,8 @@ import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BytesRef; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; import org.elasticsearch.search.aggregations.AggregationReduceContext; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategory.TokenAndWeight; @@ -40,6 +42,25 @@ */ public class TokenListCategorizer implements Accountable { + /** + * TokenListCategorizer that takes ownership of the CategorizationBytesRefHash and releases it when closed. + */ + public static class CloseableTokenListCategorizer extends TokenListCategorizer implements Releasable { + + public CloseableTokenListCategorizer( + CategorizationBytesRefHash bytesRefHash, + CategorizationPartOfSpeechDictionary partOfSpeechDictionary, + float threshold + ) { + super(bytesRefHash, partOfSpeechDictionary, threshold); + } + + @Override + public void close() { + Releasables.close(super.bytesRefHash); + } + } + public static final int MAX_TOKENS = 100; private static final long SHALLOW_SIZE = shallowSizeOfInstance(TokenListCategorizer.class); private static final long SHALLOW_SIZE_OF_ARRAY_LIST = shallowSizeOfInstance(ArrayList.class); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/AbstractMlTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/AbstractMlTokenizer.java index c701216b1984b..d7e7683ce0071 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/AbstractMlTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/AbstractMlTokenizer.java @@ -16,8 +16,11 @@ public abstract class AbstractMlTokenizer extends Tokenizer { + @SuppressWarnings("this-escape") protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + @SuppressWarnings("this-escape") protected final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + @SuppressWarnings("this-escape") protected final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** diff --git a/x-pack/plugin/security/qa/multi-cluster/build.gradle b/x-pack/plugin/security/qa/multi-cluster/build.gradle index 625b6806ab520..c7b8f81bb7876 100644 --- a/x-pack/plugin/security/qa/multi-cluster/build.gradle +++ b/x-pack/plugin/security/qa/multi-cluster/build.gradle @@ -23,6 +23,8 @@ dependencies { // esql with enrich clusterModules project(':x-pack:plugin:esql') clusterModules project(':x-pack:plugin:enrich') + clusterModules project(':x-pack:plugin:autoscaling') + clusterModules project(':x-pack:plugin:ml') clusterModules(project(":modules:ingest-common")) } diff --git a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java index f5f9410a145cc..1a236ccb6aa06 100644 --- a/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java +++ b/x-pack/plugin/security/qa/multi-cluster/src/javaRestTest/java/org/elasticsearch/xpack/remotecluster/RemoteClusterSecurityEsqlIT.java @@ -56,11 +56,14 @@ public class RemoteClusterSecurityEsqlIT extends AbstractRemoteClusterSecurityTe fulfillingCluster = ElasticsearchCluster.local() .name("fulfilling-cluster") .nodes(3) + .module("x-pack-autoscaling") .module("x-pack-esql") .module("x-pack-enrich") + .module("x-pack-ml") .module("ingest-common") .apply(commonClusterConfig) .setting("remote_cluster.port", "0") + .setting("xpack.ml.enabled", "false") .setting("xpack.security.remote_cluster_server.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) .setting("xpack.security.remote_cluster_server.ssl.key", "remote-cluster.key") .setting("xpack.security.remote_cluster_server.ssl.certificate", "remote-cluster.crt") @@ -73,10 +76,13 @@ public class RemoteClusterSecurityEsqlIT extends AbstractRemoteClusterSecurityTe queryCluster = ElasticsearchCluster.local() .name("query-cluster") + .module("x-pack-autoscaling") .module("x-pack-esql") .module("x-pack-enrich") + .module("x-pack-ml") .module("ingest-common") .apply(commonClusterConfig) + .setting("xpack.ml.enabled", "false") .setting("xpack.security.remote_cluster_client.ssl.enabled", () -> String.valueOf(SSL_ENABLED_REF.get())) .setting("xpack.security.remote_cluster_client.ssl.certificate_authorities", "remote-cluster-ca.crt") .setting("xpack.security.authc.token.enabled", "true") diff --git a/x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle b/x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle index b5b8495870259..6d41c4eddf31c 100644 --- a/x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle +++ b/x-pack/qa/multi-cluster-search-security/legacy-with-basic-license/build.gradle @@ -23,11 +23,14 @@ def fulfillingCluster = testClusters.register('fulfilling-cluster') { module ':modules:data-streams' module ':x-pack:plugin:mapper-constant-keyword' module ':x-pack:plugin:async-search' + module ':x-pack:plugin:autoscaling' module ':x-pack:plugin:esql-core' module ':x-pack:plugin:esql' + module ':x-pack:plugin:ml' module ':modules:ingest-common' module ':x-pack:plugin:enrich' user username: "test_user", password: "x-pack-test-password" + setting 'xpack.ml.enabled', 'false' } def queryingCluster = testClusters.register('querying-cluster') { @@ -38,13 +41,15 @@ def queryingCluster = testClusters.register('querying-cluster') { module ':modules:data-streams' module ':x-pack:plugin:mapper-constant-keyword' module ':x-pack:plugin:async-search' + module ':x-pack:plugin:autoscaling' module ':x-pack:plugin:esql-core' module ':x-pack:plugin:esql' + module ':x-pack:plugin:ml' module ':modules:ingest-common' module ':x-pack:plugin:enrich' setting 'cluster.remote.connections_per_cluster', "1" user username: "test_user", password: "x-pack-test-password" - + setting 'xpack.ml.enabled', 'false' setting 'cluster.remote.my_remote_cluster.skip_unavailable', 'false' if (proxyMode) { setting 'cluster.remote.my_remote_cluster.mode', 'proxy'