diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java index 5244432a89407..2e7914e64abdb 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/MachineLearningClient.java @@ -19,6 +19,10 @@ package org.elasticsearch.client; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.protocol.xpack.ml.DeleteJobRequest; +import org.elasticsearch.protocol.xpack.ml.DeleteJobResponse; +import org.elasticsearch.protocol.xpack.ml.OpenJobRequest; +import org.elasticsearch.protocol.xpack.ml.OpenJobResponse; import org.elasticsearch.protocol.xpack.ml.PutJobRequest; import org.elasticsearch.protocol.xpack.ml.PutJobResponse; @@ -77,4 +81,89 @@ public void putJobAsync(PutJobRequest request, RequestOptions options, ActionLis listener, Collections.emptySet()); } + + /** + * Deletes the given Machine Learning Job + *

+ * For additional info + * see ML Delete Job documentation + *

+ * @param request the request to delete the job + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @return action acknowledgement + * @throws IOException when there is a serialization issue sending the request or receiving the response + */ + public DeleteJobResponse deleteJob(DeleteJobRequest request, RequestOptions options) throws IOException { + return restHighLevelClient.performRequestAndParseEntity(request, + RequestConverters::deleteMachineLearningJob, + options, + DeleteJobResponse::fromXContent, + Collections.emptySet()); + } + + /** + * Deletes the given Machine Learning Job asynchronously and notifies the listener on completion + *

+ * For additional info + * see ML Delete Job documentation + *

+ * @param request the request to delete the job + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @param listener Listener to be notified upon request completion + */ + public void deleteJobAsync(DeleteJobRequest request, RequestOptions options, ActionListener listener) { + restHighLevelClient.performRequestAsyncAndParseEntity(request, + RequestConverters::deleteMachineLearningJob, + options, + DeleteJobResponse::fromXContent, + listener, + Collections.emptySet()); + } + + /** + * Opens a Machine Learning Job. + * When you open a new job, it starts with an empty model. + * + * When you open an existing job, the most recent model state is automatically loaded. + * The job is ready to resume its analysis from where it left off, once new data is received. + * + *

+ * For additional info + * see + *

+ * @param request request containing job_id and additional optional options + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @return response containing if the job was successfully opened or not. + * @throws IOException when there is a serialization issue sending the request or receiving the response + */ + public OpenJobResponse openJob(OpenJobRequest request, RequestOptions options) throws IOException { + return restHighLevelClient.performRequestAndParseEntity(request, + RequestConverters::machineLearningOpenJob, + options, + OpenJobResponse::fromXContent, + Collections.emptySet()); + } + + /** + * Opens a Machine Learning Job asynchronously, notifies listener on completion. + * When you open a new job, it starts with an empty model. + * + * When you open an existing job, the most recent model state is automatically loaded. + * The job is ready to resume its analysis from where it left off, once new data is received. + *

+ * For additional info + * see + *

+ * @param request request containing job_id and additional optional options + * @param options Additional request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized + * @param listener Listener to be notified upon request completion + */ + public void openJobAsync(OpenJobRequest request, RequestOptions options, ActionListener listener) { + restHighLevelClient.performRequestAsyncAndParseEntity(request, + RequestConverters::machineLearningOpenJob, + options, + OpenJobResponse::fromXContent, + listener, + Collections.emptySet()); + } } diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java index 45c70593fe826..c40b4893e0146 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/RequestConverters.java @@ -112,6 +112,8 @@ import org.elasticsearch.protocol.xpack.license.GetLicenseRequest; import org.elasticsearch.protocol.xpack.license.PutLicenseRequest; import org.elasticsearch.protocol.xpack.migration.IndexUpgradeInfoRequest; +import org.elasticsearch.protocol.xpack.ml.DeleteJobRequest; +import org.elasticsearch.protocol.xpack.ml.OpenJobRequest; import org.elasticsearch.protocol.xpack.ml.PutJobRequest; import org.elasticsearch.protocol.xpack.watcher.DeleteWatchRequest; import org.elasticsearch.protocol.xpack.watcher.PutWatchRequest; @@ -1210,6 +1212,34 @@ static Request putMachineLearningJob(PutJobRequest putJobRequest) throws IOExcep return request; } + static Request deleteMachineLearningJob(DeleteJobRequest deleteJobRequest) { + String endpoint = new EndpointBuilder() + .addPathPartAsIs("_xpack") + .addPathPartAsIs("ml") + .addPathPartAsIs("anomaly_detectors") + .addPathPart(deleteJobRequest.getJobId()) + .build(); + Request request = new Request(HttpDelete.METHOD_NAME, endpoint); + + Params params = new Params(request); + params.putParam("force", Boolean.toString(deleteJobRequest.isForce())); + + return request; + } + + static Request machineLearningOpenJob(OpenJobRequest openJobRequest) throws IOException { + String endpoint = new EndpointBuilder() + .addPathPartAsIs("_xpack") + .addPathPartAsIs("ml") + .addPathPartAsIs("anomaly_detectors") + .addPathPart(openJobRequest.getJobId()) + .addPathPartAsIs("_open") + .build(); + Request request = new Request(HttpPost.METHOD_NAME, endpoint); + request.setJsonEntity(openJobRequest.toString()); + return request; + } + static Request getMigrationAssistance(IndexUpgradeInfoRequest indexUpgradeInfoRequest) { EndpointBuilder endpointBuilder = new EndpointBuilder() .addPathPartAsIs("_xpack/migration/assistance") diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java index f86eb5b5dca87..0037460150f1a 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/MachineLearningIT.java @@ -20,6 +20,10 @@ import com.carrotsearch.randomizedtesting.generators.CodepointSetGenerator; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.protocol.xpack.ml.DeleteJobRequest; +import org.elasticsearch.protocol.xpack.ml.DeleteJobResponse; +import org.elasticsearch.protocol.xpack.ml.OpenJobRequest; +import org.elasticsearch.protocol.xpack.ml.OpenJobResponse; import org.elasticsearch.protocol.xpack.ml.PutJobRequest; import org.elasticsearch.protocol.xpack.ml.PutJobResponse; import org.elasticsearch.protocol.xpack.ml.job.config.AnalysisConfig; @@ -46,12 +50,37 @@ public void testPutJob() throws Exception { assertThat(createdJob.getJobType(), is(Job.ANOMALY_DETECTOR_JOB_TYPE)); } + public void testDeleteJob() throws Exception { + String jobId = randomValidJobId(); + Job job = buildJob(jobId); + MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); + machineLearningClient.putJob(new PutJobRequest(job), RequestOptions.DEFAULT); + + DeleteJobResponse response = execute(new DeleteJobRequest(jobId), + machineLearningClient::deleteJob, + machineLearningClient::deleteJobAsync); + + assertTrue(response.isAcknowledged()); + } + + public void testOpenJob() throws Exception { + String jobId = randomValidJobId(); + Job job = buildJob(jobId); + MachineLearningClient machineLearningClient = highLevelClient().machineLearning(); + + machineLearningClient.putJob(new PutJobRequest(job), RequestOptions.DEFAULT); + + OpenJobResponse response = execute(new OpenJobRequest(jobId), machineLearningClient::openJob, machineLearningClient::openJobAsync); + + assertTrue(response.isOpened()); + } + public static String randomValidJobId() { CodepointSetGenerator generator = new CodepointSetGenerator("abcdefghijklmnopqrstuvwxyz0123456789".toCharArray()); return generator.ofCodePointsLength(random(), 10, 10); } - private static Job buildJob(String jobId) { + public static Job buildJob(String jobId) { Job.Builder builder = new Job.Builder(jobId); builder.setDescription(randomAlphaOfLength(10)); diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java index 47195f0bb2aba..786cb94f8926d 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/RequestConvertersTests.java @@ -127,6 +127,8 @@ import org.elasticsearch.index.rankeval.RestRankEvalAction; import org.elasticsearch.protocol.xpack.XPackInfoRequest; import org.elasticsearch.protocol.xpack.migration.IndexUpgradeInfoRequest; +import org.elasticsearch.protocol.xpack.ml.DeleteJobRequest; +import org.elasticsearch.protocol.xpack.ml.OpenJobRequest; import org.elasticsearch.protocol.xpack.watcher.DeleteWatchRequest; import org.elasticsearch.protocol.xpack.watcher.PutWatchRequest; import org.elasticsearch.repositories.fs.FsRepository; @@ -2610,6 +2612,33 @@ public void testXPackDeleteWatch() { assertThat(request.getEntity(), nullValue()); } + public void testDeleteMachineLearningJob() { + String jobId = randomAlphaOfLength(10); + DeleteJobRequest deleteJobRequest = new DeleteJobRequest(jobId); + + Request request = RequestConverters.deleteMachineLearningJob(deleteJobRequest); + assertEquals(HttpDelete.METHOD_NAME, request.getMethod()); + assertEquals("/_xpack/ml/anomaly_detectors/" + jobId, request.getEndpoint()); + assertEquals(Boolean.toString(false), request.getParameters().get("force")); + + deleteJobRequest.setForce(true); + request = RequestConverters.deleteMachineLearningJob(deleteJobRequest); + assertEquals(Boolean.toString(true), request.getParameters().get("force")); + } + + public void testPostMachineLearningOpenJob() throws Exception { + String jobId = "some-job-id"; + OpenJobRequest openJobRequest = new OpenJobRequest(jobId); + openJobRequest.setTimeout(TimeValue.timeValueMinutes(10)); + + Request request = RequestConverters.machineLearningOpenJob(openJobRequest); + assertEquals(HttpPost.METHOD_NAME, request.getMethod()); + assertEquals("/_xpack/ml/anomaly_detectors/" + jobId + "/_open", request.getEndpoint()); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + request.getEntity().writeTo(bos); + assertEquals(bos.toString("UTF-8"), "{\"job_id\":\""+ jobId +"\",\"timeout\":\"10m\"}"); + } + /** * Randomize the {@link FetchSourceContext} request parameters. */ diff --git a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java index 97bee81393864..a77d8b43e5737 100644 --- a/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java +++ b/client/rest-high-level/src/test/java/org/elasticsearch/client/documentation/MlClientDocumentationIT.java @@ -21,9 +21,14 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.LatchedActionListener; import org.elasticsearch.client.ESRestHighLevelClientTestCase; +import org.elasticsearch.client.MachineLearningIT; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.RestHighLevelClient; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.protocol.xpack.ml.DeleteJobRequest; +import org.elasticsearch.protocol.xpack.ml.DeleteJobResponse; +import org.elasticsearch.protocol.xpack.ml.OpenJobRequest; +import org.elasticsearch.protocol.xpack.ml.OpenJobResponse; import org.elasticsearch.protocol.xpack.ml.PutJobRequest; import org.elasticsearch.protocol.xpack.ml.PutJobResponse; import org.elasticsearch.protocol.xpack.ml.job.config.AnalysisConfig; @@ -118,4 +123,102 @@ public void onFailure(Exception e) { assertTrue(latch.await(30L, TimeUnit.SECONDS)); } } + + public void testDeleteJob() throws Exception { + RestHighLevelClient client = highLevelClient(); + + String jobId = "my-first-machine-learning-job"; + + Job job = MachineLearningIT.buildJob(jobId); + client.machineLearning().putJob(new PutJobRequest(job), RequestOptions.DEFAULT); + + Job secondJob = MachineLearningIT.buildJob("my-second-machine-learning-job"); + client.machineLearning().putJob(new PutJobRequest(secondJob), RequestOptions.DEFAULT); + + { + //tag::x-pack-delete-ml-job-request + DeleteJobRequest deleteJobRequest = new DeleteJobRequest("my-first-machine-learning-job"); + deleteJobRequest.setForce(false); //<1> + DeleteJobResponse deleteJobResponse = client.machineLearning().deleteJob(deleteJobRequest, RequestOptions.DEFAULT); + //end::x-pack-delete-ml-job-request + + //tag::x-pack-delete-ml-job-response + boolean isAcknowledged = deleteJobResponse.isAcknowledged(); //<1> + //end::x-pack-delete-ml-job-response + } + { + //tag::x-pack-delete-ml-job-request-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(DeleteJobResponse deleteJobResponse) { + // <1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + //end::x-pack-delete-ml-job-request-listener + + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + //tag::x-pack-delete-ml-job-request-async + DeleteJobRequest deleteJobRequest = new DeleteJobRequest("my-second-machine-learning-job"); + client.machineLearning().deleteJobAsync(deleteJobRequest, RequestOptions.DEFAULT, listener); // <1> + //end::x-pack-delete-ml-job-request-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } + + public void testOpenJob() throws Exception { + RestHighLevelClient client = highLevelClient(); + + Job job = MachineLearningIT.buildJob("opening-my-first-machine-learning-job"); + client.machineLearning().putJob(new PutJobRequest(job), RequestOptions.DEFAULT); + + Job secondJob = MachineLearningIT.buildJob("opening-my-second-machine-learning-job"); + client.machineLearning().putJob(new PutJobRequest(secondJob), RequestOptions.DEFAULT); + + { + //tag::x-pack-ml-open-job-request + OpenJobRequest openJobRequest = new OpenJobRequest("opening-my-first-machine-learning-job"); //<1> + openJobRequest.setTimeout(TimeValue.timeValueMinutes(10)); //<2> + //end::x-pack-ml-open-job-request + + //tag::x-pack-ml-open-job-execute + OpenJobResponse openJobResponse = client.machineLearning().openJob(openJobRequest, RequestOptions.DEFAULT); + boolean isOpened = openJobResponse.isOpened(); //<1> + //end::x-pack-ml-open-job-execute + + } + { + //tag::x-pack-ml-open-job-listener + ActionListener listener = new ActionListener() { + @Override + public void onResponse(OpenJobResponse openJobResponse) { + //<1> + } + + @Override + public void onFailure(Exception e) { + // <2> + } + }; + //end::x-pack-ml-open-job-listener + OpenJobRequest openJobRequest = new OpenJobRequest("opening-my-second-machine-learning-job"); + // Replace the empty listener by a blocking listener in test + final CountDownLatch latch = new CountDownLatch(1); + listener = new LatchedActionListener<>(listener, latch); + + // tag::x-pack-ml-open-job-execute-async + client.machineLearning().openJobAsync(openJobRequest, RequestOptions.DEFAULT, listener); //<1> + // end::x-pack-ml-open-job-execute-async + + assertTrue(latch.await(30L, TimeUnit.SECONDS)); + } + } } diff --git a/docs/java-rest/high-level/ml/delete-job.asciidoc b/docs/java-rest/high-level/ml/delete-job.asciidoc new file mode 100644 index 0000000000000..44a6a47940955 --- /dev/null +++ b/docs/java-rest/high-level/ml/delete-job.asciidoc @@ -0,0 +1,49 @@ +[[java-rest-high-x-pack-ml-delete-job]] +=== Delete Job API + +[[java-rest-high-x-pack-machine-learning-delete-job-request]] +==== Delete Job Request + +A `DeleteJobRequest` object requires a non-null `jobId` and can optionally set `force`. +Can be executed as follows: + +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-delete-ml-job-request] +--------------------------------------------------- +<1> Use to forcefully delete an opened job; +this method is quicker than closing and deleting the job. +Defaults to `false` + +[[java-rest-high-x-pack-machine-learning-delete-job-response]] +==== Delete Job Response + +The returned `DeleteJobResponse` object indicates the acknowledgement of the request: +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-delete-ml-job-response] +--------------------------------------------------- +<1> `isAcknowledged` was the deletion request acknowledged or not + +[[java-rest-high-x-pack-machine-learning-delete-job-async]] +==== Delete Job Asynchronously + +This request can also be made asynchronously. +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-delete-ml-job-request-async] +--------------------------------------------------- +<1> The `DeleteJobRequest` to execute and the `ActionListener` to alert on completion or error. + +The deletion request returns immediately. Once the request is completed, the `ActionListener` is +called back using the `onResponse` or `onFailure`. The latter indicates some failure occurred when +making the request. + +A typical listener for a `DeleteJobRequest` could be defined as follows: + +["source","java",subs="attributes,callouts,macros"] +--------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-delete-ml-job-request-listener] +--------------------------------------------------- +<1> The action to be taken when it is completed +<2> What to do when a failure occurs diff --git a/docs/java-rest/high-level/ml/open-job.asciidoc b/docs/java-rest/high-level/ml/open-job.asciidoc new file mode 100644 index 0000000000000..ad575121818bc --- /dev/null +++ b/docs/java-rest/high-level/ml/open-job.asciidoc @@ -0,0 +1,55 @@ +[[java-rest-high-x-pack-ml-open-job]] +=== Open Job API + +The Open Job API provides the ability to open {ml} jobs in the cluster. +It accepts a `OpenJobRequest` object and responds +with a `OpenJobResponse` object. + +[[java-rest-high-x-pack-ml-open-job-request]] +==== Open Job Request + +An `OpenJobRequest` object gets created with an existing non-null `jobId`. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-open-job-request] +-------------------------------------------------- +<1> Constructing a new request referencing an existing `jobId` +<2> Optionally setting the `timeout` value for how long the +execution should wait for the job to be opened. + +[[java-rest-high-x-pack-ml-open-job-execution]] +==== Execution + +The request can be executed through the `MachineLearningClient` contained +in the `RestHighLevelClient` object, accessed via the `machineLearningClient()` method. + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-open-job-execute] +-------------------------------------------------- +<1> `isOpened()` from the `OpenJobResponse` indicates if the job was successfully +opened or not. + +[[java-rest-high-x-pack-ml-open-job-execution-async]] +==== Asynchronous Execution + +The request can also be executed asynchronously: + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-open-job-execute-async] +-------------------------------------------------- +<1> The `OpenJobRequest` to execute and the `ActionListener` to use when +the execution completes + +The method does not block and returns immediately. The passed `ActionListener` is used +to notify the caller of completion. A typical `ActionListner` for `OpenJobResponse` may +look like + +["source","java",subs="attributes,callouts,macros"] +-------------------------------------------------- +include-tagged::{doc-tests}/MlClientDocumentationIT.java[x-pack-ml-open-job-listener] +-------------------------------------------------- +<1> `onResponse` is called back when the action is completed successfully +<2> `onFailure` is called back when some unexpected error occurs diff --git a/docs/java-rest/high-level/ml/put_job.asciidoc b/docs/java-rest/high-level/ml/put-job.asciidoc similarity index 100% rename from docs/java-rest/high-level/ml/put_job.asciidoc rename to docs/java-rest/high-level/ml/put-job.asciidoc diff --git a/docs/java-rest/high-level/supported-apis.asciidoc b/docs/java-rest/high-level/supported-apis.asciidoc index 808546f2c279c..6bcb736243a7c 100644 --- a/docs/java-rest/high-level/supported-apis.asciidoc +++ b/docs/java-rest/high-level/supported-apis.asciidoc @@ -205,8 +205,12 @@ include::licensing/delete-license.asciidoc[] The Java High Level REST Client supports the following Machine Learning APIs: * <> +* <> +* <> -include::ml/put_job.asciidoc[] +include::ml/put-job.asciidoc[] +include::ml/delete-job.asciidoc[] +include::ml/open-job.asciidoc[] == Migration APIs diff --git a/docs/plugins/integrations.asciidoc b/docs/plugins/integrations.asciidoc index 90f2c685fdaeb..8bffe5193ed7b 100644 --- a/docs/plugins/integrations.asciidoc +++ b/docs/plugins/integrations.asciidoc @@ -17,14 +17,11 @@ Integrations are not plugins, but are external tools or modules that make it eas * https://drupal.org/project/elasticsearch_connector[Drupal]: Drupal Elasticsearch integration. -* https://wordpress.org/plugins/wpsolr-search-engine/[WPSOLR]: - Elasticsearch (and Apache Solr) WordPress Plugin - -* http://searchbox-io.github.com/wp-elasticsearch/[Wp-Elasticsearch]: +* https://wordpress.org/plugins/elasticpress/[ElasticPress]: Elasticsearch WordPress Plugin -* https://github.com/wallmanderco/elasticsearch-indexer[Elasticsearch Indexer]: - Elasticsearch WordPress Plugin +* https://wordpress.org/plugins/wpsolr-search-engine/[WPSOLR]: + Elasticsearch (and Apache Solr) WordPress Plugin * https://doc.tiki.org/Elasticsearch[Tiki Wiki CMS Groupware]: Tiki has native support for Elasticsearch. This provides faster & better diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 2a51d705d83ec..e7c9b593af372 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -90,7 +90,8 @@ And here is a sample response: Set to `false` to return an overall failure if the request would produce partial results. Defaults to true, which will allow partial results in the case of timeouts - or partial failures. + or partial failures. This default can be controlled using the cluster-level setting + `search.default_allow_partial_results`. `terminate_after`:: diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index a90f32bb3cd36..279bc0c0384c1 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -125,5 +125,6 @@ more details on the different types of search that can be performed. |`allow_partial_search_results` |Set to `false` to return an overall failure if the request would produce partial results. Defaults to true, which will allow partial results in the case of timeouts -or partial failures.. +or partial failures. This default can be controlled using the cluster-level setting +`search.default_allow_partial_results`. |======================================================================= diff --git a/docs/reference/setup/important-settings/heap-dump-path.asciidoc b/docs/reference/setup/important-settings/heap-dump-path.asciidoc index b0d301b21d0b8..fb8c7ff35f0d0 100644 --- a/docs/reference/setup/important-settings/heap-dump-path.asciidoc +++ b/docs/reference/setup/important-settings/heap-dump-path.asciidoc @@ -8,8 +8,8 @@ distributions, and the `data` directory under the root of the Elasticsearch installation for the <> archive distributions). If this path is not suitable for receiving heap dumps, you should modify the entry `-XX:HeapDumpPath=...` in -<>. If you specify a fixed filename instead -of a directory, the JVM will repeatedly use the same file; this is one -mechanism for preventing heap dumps from accumulating in the heap dump -path. Alternatively, you can configure a scheduled task via your OS to -remove heap dumps that are older than a configured age. +<>. If you specify a directory, the JVM +will generate a filename for the heap dump based on the PID of the running +instance. If you specify a fixed filename instead of a directory, the file must +not exist when the JVM needs to perform a heap dump on an out of memory +exception, otherwise the heap dump will fail. diff --git a/docs/reference/setup/important-settings/network-host.asciidoc b/docs/reference/setup/important-settings/network-host.asciidoc index 7e29e73123d8d..1788bfebc66b5 100644 --- a/docs/reference/setup/important-settings/network-host.asciidoc +++ b/docs/reference/setup/important-settings/network-host.asciidoc @@ -9,7 +9,7 @@ location on a single node. This can be useful for testing Elasticsearch's ability to form clusters, but it is not a configuration recommended for production. -In order to communicate and to form a cluster with nodes on other servers, your +In order to form a cluster with nodes on other servers, your node will need to bind to a non-loopback address. While there are many <>, usually all you need to configure is `network.host`: diff --git a/libs/core/src/main/java/org/elasticsearch/common/CharArrays.java b/libs/core/src/main/java/org/elasticsearch/common/CharArrays.java new file mode 100644 index 0000000000000..907874ca5735b --- /dev/null +++ b/libs/core/src/main/java/org/elasticsearch/common/CharArrays.java @@ -0,0 +1,150 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Objects; + +/** + * Helper class similar to Arrays to handle conversions for Char arrays + */ +public final class CharArrays { + + private CharArrays() {} + + /** + * Decodes the provided byte[] to a UTF-8 char[]. This is done while avoiding + * conversions to String. The provided byte[] is not modified by this method, so + * the caller needs to take care of clearing the value if it is sensitive. + */ + public static char[] utf8BytesToChars(byte[] utf8Bytes) { + final ByteBuffer byteBuffer = ByteBuffer.wrap(utf8Bytes); + final CharBuffer charBuffer = StandardCharsets.UTF_8.decode(byteBuffer); + final char[] chars; + if (charBuffer.hasArray()) { + // there is no guarantee that the char buffers backing array is the right size + // so we need to make a copy + chars = Arrays.copyOfRange(charBuffer.array(), charBuffer.position(), charBuffer.limit()); + Arrays.fill(charBuffer.array(), (char) 0); // clear sensitive data + } else { + final int length = charBuffer.limit() - charBuffer.position(); + chars = new char[length]; + charBuffer.get(chars); + // if the buffer is not read only we can reset and fill with 0's + if (charBuffer.isReadOnly() == false) { + charBuffer.clear(); // reset + for (int i = 0; i < charBuffer.limit(); i++) { + charBuffer.put((char) 0); + } + } + } + return chars; + } + + /** + * Encodes the provided char[] to a UTF-8 byte[]. This is done while avoiding + * conversions to String. The provided char[] is not modified by this method, so + * the caller needs to take care of clearing the value if it is sensitive. + */ + public static byte[] toUtf8Bytes(char[] chars) { + final CharBuffer charBuffer = CharBuffer.wrap(chars); + final ByteBuffer byteBuffer = StandardCharsets.UTF_8.encode(charBuffer); + final byte[] bytes; + if (byteBuffer.hasArray()) { + // there is no guarantee that the byte buffers backing array is the right size + // so we need to make a copy + bytes = Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(), byteBuffer.limit()); + Arrays.fill(byteBuffer.array(), (byte) 0); // clear sensitive data + } else { + final int length = byteBuffer.limit() - byteBuffer.position(); + bytes = new byte[length]; + byteBuffer.get(bytes); + // if the buffer is not read only we can reset and fill with 0's + if (byteBuffer.isReadOnly() == false) { + byteBuffer.clear(); // reset + for (int i = 0; i < byteBuffer.limit(); i++) { + byteBuffer.put((byte) 0); + } + } + } + return bytes; + } + + /** + * Tests if a char[] contains a sequence of characters that match the prefix. This is like + * {@link String#startsWith(String)} but does not require conversion of the char[] to a string. + */ + public static boolean charsBeginsWith(String prefix, char[] chars) { + if (chars == null || prefix == null) { + return false; + } + + if (prefix.length() > chars.length) { + return false; + } + + for (int i = 0; i < prefix.length(); i++) { + if (chars[i] != prefix.charAt(i)) { + return false; + } + } + + return true; + } + + /** + * Constant time equality check of char arrays to avoid potential timing attacks. + */ + public static boolean constantTimeEquals(char[] a, char[] b) { + Objects.requireNonNull(a, "char arrays must not be null for constantTimeEquals"); + Objects.requireNonNull(b, "char arrays must not be null for constantTimeEquals"); + if (a.length != b.length) { + return false; + } + + int equals = 0; + for (int i = 0; i < a.length; i++) { + equals |= a[i] ^ b[i]; + } + + return equals == 0; + } + + /** + * Constant time equality check of strings to avoid potential timing attacks. + */ + public static boolean constantTimeEquals(String a, String b) { + Objects.requireNonNull(a, "strings must not be null for constantTimeEquals"); + Objects.requireNonNull(b, "strings must not be null for constantTimeEquals"); + if (a.length() != b.length()) { + return false; + } + + int equals = 0; + for (int i = 0; i < a.length(); i++) { + equals |= a.charAt(i) ^ b.charAt(i); + } + + return equals == 0; + } +} diff --git a/libs/core/src/test/java/org/elasticsearch/common/CharArraysTests.java b/libs/core/src/test/java/org/elasticsearch/common/CharArraysTests.java new file mode 100644 index 0000000000000..9283283ab0861 --- /dev/null +++ b/libs/core/src/test/java/org/elasticsearch/common/CharArraysTests.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common; + +import org.elasticsearch.test.ESTestCase; + +import java.nio.charset.StandardCharsets; + +public class CharArraysTests extends ESTestCase { + + public void testCharsToBytes() { + final String originalValue = randomUnicodeOfCodepointLengthBetween(0, 32); + final byte[] expectedBytes = originalValue.getBytes(StandardCharsets.UTF_8); + final char[] valueChars = originalValue.toCharArray(); + + final byte[] convertedBytes = CharArrays.toUtf8Bytes(valueChars); + assertArrayEquals(expectedBytes, convertedBytes); + } + + public void testBytesToUtf8Chars() { + final String originalValue = randomUnicodeOfCodepointLengthBetween(0, 32); + final byte[] bytes = originalValue.getBytes(StandardCharsets.UTF_8); + final char[] expectedChars = originalValue.toCharArray(); + + final char[] convertedChars = CharArrays.utf8BytesToChars(bytes); + assertArrayEquals(expectedChars, convertedChars); + } + + public void testCharsBeginsWith() { + assertFalse(CharArrays.charsBeginsWith(randomAlphaOfLength(4), null)); + assertFalse(CharArrays.charsBeginsWith(null, null)); + assertFalse(CharArrays.charsBeginsWith(null, randomAlphaOfLength(4).toCharArray())); + assertFalse(CharArrays.charsBeginsWith(randomAlphaOfLength(2), randomAlphaOfLengthBetween(3, 8).toCharArray())); + + final String prefix = randomAlphaOfLengthBetween(2, 4); + assertTrue(CharArrays.charsBeginsWith(prefix, prefix.toCharArray())); + final char[] prefixedValue = prefix.concat(randomAlphaOfLengthBetween(1, 12)).toCharArray(); + assertTrue(CharArrays.charsBeginsWith(prefix, prefixedValue)); + + final String modifiedPrefix = randomBoolean() ? prefix.substring(1) : prefix.substring(0, prefix.length() - 1); + char[] nonMatchingValue; + do { + nonMatchingValue = modifiedPrefix.concat(randomAlphaOfLengthBetween(0, 12)).toCharArray(); + } while (new String(nonMatchingValue).startsWith(prefix)); + assertFalse(CharArrays.charsBeginsWith(prefix, nonMatchingValue)); + assertTrue(CharArrays.charsBeginsWith(modifiedPrefix, nonMatchingValue)); + } + + public void testConstantTimeEquals() { + final String value = randomAlphaOfLengthBetween(0, 32); + assertTrue(CharArrays.constantTimeEquals(value, value)); + assertTrue(CharArrays.constantTimeEquals(value.toCharArray(), value.toCharArray())); + + final String other = randomAlphaOfLengthBetween(1, 32); + assertFalse(CharArrays.constantTimeEquals(value, other)); + assertFalse(CharArrays.constantTimeEquals(value.toCharArray(), other.toCharArray())); + } +} diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AppendProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AppendProcessorFactoryTests.java index 39a7bfd9a20b2..d51cb368e4317 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AppendProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AppendProcessorFactoryTests.java @@ -100,6 +100,6 @@ public void testInvalidMustacheTemplate() throws Exception { String processorTag = randomAlphaOfLength(10); ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, config)); assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); - assertThat(exception.getHeader("processor_tag").get(0), equalTo(processorTag)); + assertThat(exception.getMetadata("es.processor_tag").get(0), equalTo(processorTag)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/ConvertProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/ConvertProcessorFactoryTests.java index 9e4acd7b17f83..f3396da64eb5f 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/ConvertProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/ConvertProcessorFactoryTests.java @@ -58,9 +58,9 @@ public void testCreateUnsupportedType() throws Exception { fail("factory create should have failed"); } catch (ElasticsearchParseException e) { assertThat(e.getMessage(), Matchers.equalTo("[type] type [" + type + "] not supported, cannot convert field.")); - assertThat(e.getHeader("processor_type").get(0), equalTo(ConvertProcessor.TYPE)); - assertThat(e.getHeader("property_name").get(0), equalTo("type")); - assertThat(e.getHeader("processor_tag"), nullValue()); + assertThat(e.getMetadata("es.processor_type").get(0), equalTo(ConvertProcessor.TYPE)); + assertThat(e.getMetadata("es.property_name").get(0), equalTo("type")); + assertThat(e.getMetadata("es.processor_tag"), nullValue()); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/FailProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/FailProcessorFactoryTests.java index 801441407a7f7..3c89778f0e825 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/FailProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/FailProcessorFactoryTests.java @@ -66,6 +66,6 @@ public void testInvalidMustacheTemplate() throws Exception { String processorTag = randomAlphaOfLength(10); ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, config)); assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); - assertThat(exception.getHeader("processor_tag").get(0), equalTo(processorTag)); + assertThat(exception.getMetadata("es.processor_tag").get(0), equalTo(processorTag)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RemoveProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RemoveProcessorFactoryTests.java index c439a9662f202..bebe780276208 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RemoveProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/RemoveProcessorFactoryTests.java @@ -79,6 +79,6 @@ public void testInvalidMustacheTemplate() throws Exception { String processorTag = randomAlphaOfLength(10); ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, config)); assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); - assertThat(exception.getHeader("processor_tag").get(0), equalTo(processorTag)); + assertThat(exception.getMetadata("es.processor_tag").get(0), equalTo(processorTag)); } } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SetProcessorFactoryTests.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SetProcessorFactoryTests.java index 59a99b8f995d8..9602f34f698f7 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SetProcessorFactoryTests.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SetProcessorFactoryTests.java @@ -108,7 +108,7 @@ public void testInvalidMustacheTemplate() throws Exception { String processorTag = randomAlphaOfLength(10); ElasticsearchException exception = expectThrows(ElasticsearchException.class, () -> factory.create(null, processorTag, config)); assertThat(exception.getMessage(), equalTo("java.lang.RuntimeException: could not compile script")); - assertThat(exception.getHeader("processor_tag").get(0), equalTo(processorTag)); + assertThat(exception.getMetadata("es.processor_tag").get(0), equalTo(processorTag)); } } diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/20_crud.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/20_crud.yml index 0e348bbd7265d..bd6a3e6ca14fd 100644 --- a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/20_crud.yml +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/20_crud.yml @@ -158,9 +158,9 @@ teardown: } - match: { error.root_cause.0.type: "parse_exception" } - match: { error.root_cause.0.reason: "[field] required property is missing" } - - match: { error.root_cause.0.header.processor_tag: "fritag" } - - match: { error.root_cause.0.header.processor_type: "set" } - - match: { error.root_cause.0.header.property_name: "field" } + - match: { error.root_cause.0.processor_tag: "fritag" } + - match: { error.root_cause.0.processor_type: "set" } + - match: { error.root_cause.0.property_name: "field" } --- "Test basic pipeline with on_failure in processor": diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/50_on_failure.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/50_on_failure.yml index 4b40d9f670bfe..718b91ac1c111 100644 --- a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/50_on_failure.yml +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/50_on_failure.yml @@ -148,9 +148,9 @@ teardown: } - match: { error.root_cause.0.type: "parse_exception" } - match: { error.root_cause.0.reason: "[on_failure] processors list cannot be empty" } - - match: { error.root_cause.0.header.processor_type: "fail" } - - match: { error.root_cause.0.header.processor_tag: "emptyfail" } - - match: { error.root_cause.0.header.property_name: "on_failure" } + - match: { error.root_cause.0.processor_type: "fail" } + - match: { error.root_cause.0.processor_tag: "emptyfail" } + - match: { error.root_cause.0.property_name: "on_failure" } --- "Test pipeline with empty on_failure in pipeline": diff --git a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/90_simulate.yml b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/90_simulate.yml index 8b3ed313314bb..776a8af0c2420 100644 --- a/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/90_simulate.yml +++ b/modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/90_simulate.yml @@ -107,9 +107,9 @@ teardown: } - match: { error.root_cause.0.type: "parse_exception" } - match: { error.root_cause.0.reason: "[field] required property is missing" } - - match: { error.root_cause.0.header.processor_tag: "fails" } - - match: { error.root_cause.0.header.processor_type: "set" } - - match: { error.root_cause.0.header.property_name: "field" } + - match: { error.root_cause.0.processor_tag: "fails" } + - match: { error.root_cause.0.processor_type: "set" } + - match: { error.root_cause.0.property_name: "field" } --- "Test simulate without index type and id": @@ -198,9 +198,9 @@ teardown: } ] } - - is_false: error.root_cause.0.header.processor_type - - is_false: error.root_cause.0.header.processor_tag - - match: { error.root_cause.0.header.property_name: "pipeline" } + - is_false: error.root_cause.0.processor_type + - is_false: error.root_cause.0.processor_tag + - match: { error.root_cause.0.property_name: "pipeline" } - match: { error.reason: "[pipeline] required property is missing" } --- @@ -233,9 +233,9 @@ teardown: } - match: { error.root_cause.0.type: "parse_exception" } - match: { error.root_cause.0.reason: "[value] required property is missing" } - - match: { error.root_cause.0.header.processor_type: "set" } - - match: { error.root_cause.0.header.property_name: "value" } - - is_false: error.root_cause.0.header.processor_tag + - match: { error.root_cause.0.processor_type: "set" } + - match: { error.root_cause.0.property_name: "value" } + - is_false: error.root_cause.0.processor_tag --- "Test simulate with verbose flag": diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/ScriptClassInfo.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/ScriptClassInfo.java index 345db46f8875f..7de8353194dda 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/ScriptClassInfo.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/ScriptClassInfo.java @@ -21,6 +21,7 @@ import org.elasticsearch.painless.lookup.PainlessLookup; import org.elasticsearch.painless.lookup.PainlessLookupUtility; +import org.elasticsearch.painless.lookup.def; import java.lang.invoke.MethodType; import java.lang.reflect.Field; @@ -190,7 +191,7 @@ private static Class definitionTypeForClass(PainlessLookup painlessLookup, Cl componentType = componentType.getComponentType(); } - if (painlessLookup.lookupPainlessClass(componentType) == null) { + if (componentType != def.class && painlessLookup.lookupPainlessClass(componentType) == null) { throw new IllegalArgumentException(unknownErrorMessageSource.apply(componentType)); } diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookup.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookup.java index 16b8ac14f14f2..55855a3cb1efb 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookup.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookup.java @@ -26,6 +26,7 @@ import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.painless.lookup.PainlessLookupUtility.DEF_CLASS_NAME; import static org.elasticsearch.painless.lookup.PainlessLookupUtility.buildPainlessConstructorKey; import static org.elasticsearch.painless.lookup.PainlessLookupUtility.buildPainlessFieldKey; import static org.elasticsearch.painless.lookup.PainlessLookupUtility.buildPainlessMethodKey; @@ -47,7 +48,7 @@ public final class PainlessLookup { public boolean isValidCanonicalClassName(String canonicalClassName) { Objects.requireNonNull(canonicalClassName); - return canonicalClassNamesToClasses.containsKey(canonicalClassName); + return DEF_CLASS_NAME.equals(canonicalClassName) || canonicalClassNamesToClasses.containsKey(canonicalClassName); } public Class canonicalTypeNameToType(String canonicalTypeName) { diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupBuilder.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupBuilder.java index e644453a4c1ba..c8353b54c9f44 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupBuilder.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupBuilder.java @@ -211,9 +211,6 @@ public static PainlessLookup buildFromWhitelists(List whitelists) { public PainlessLookupBuilder() { canonicalClassNamesToClasses = new HashMap<>(); classesToPainlessClassBuilders = new HashMap<>(); - - canonicalClassNamesToClasses.put(DEF_CLASS_NAME, def.class); - classesToPainlessClassBuilders.put(def.class, new PainlessClassBuilder()); } private Class canonicalTypeNameToType(String canonicalTypeName) { @@ -225,7 +222,7 @@ private boolean isValidType(Class type) { type = type.getComponentType(); } - return classesToPainlessClassBuilders.containsKey(type); + return type == def.class || classesToPainlessClassBuilders.containsKey(type); } public void addPainlessClass(ClassLoader classLoader, String javaClassName, boolean importClassName) { diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupUtility.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupUtility.java index f2eb434516961..71cacab9eba9d 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupUtility.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/lookup/PainlessLookupUtility.java @@ -82,7 +82,7 @@ public static Class canonicalTypeNameToType(String canonicalTypeName, Map type = canonicalClassNamesToClasses.get(canonicalTypeName); + Class type = DEF_CLASS_NAME.equals(canonicalTypeName) ? def.class : canonicalClassNamesToClasses.get(canonicalTypeName); if (type != null) { return type; @@ -105,7 +105,7 @@ public static Class canonicalTypeNameToType(String canonicalTypeName, Map { - /** - * The password which is broadcasted to all nodes, but is never stored on - * persistent storage. The password is used to reread and decrypt the contents - * of the node's keystore (backing the implementation of - * {@code SecureSettings}). - */ - private SecureString secureSettingsPassword; - public NodesReloadSecureSettingsRequest() { } /** - * Reload secure settings only on certain nodes, based on the nodes ids - * specified. If none are passed, secure settings will be reloaded on all the - * nodes. + * Reload secure settings only on certain nodes, based on the nodes IDs specified. If none are passed, secure settings will be reloaded + * on all the nodes. */ - public NodesReloadSecureSettingsRequest(String... nodesIds) { + public NodesReloadSecureSettingsRequest(final String... nodesIds) { super(nodesIds); } - @Override - public ActionRequestValidationException validate() { - ActionRequestValidationException validationException = null; - if (secureSettingsPassword == null) { - validationException = addValidationError("secure settings password cannot be null (use empty string instead)", - validationException); - } - return validationException; - } - - public SecureString secureSettingsPassword() { - return secureSettingsPassword; - } - - public NodesReloadSecureSettingsRequest secureStorePassword(SecureString secureStorePassword) { - this.secureSettingsPassword = secureStorePassword; - return this; - } - - @Override - public void readFrom(StreamInput in) throws IOException { - super.readFrom(in); - final byte[] passwordBytes = in.readByteArray(); - try { - this.secureSettingsPassword = new SecureString(utf8BytesToChars(passwordBytes)); - } finally { - Arrays.fill(passwordBytes, (byte) 0); - } - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - final byte[] passwordBytes = charsToUtf8Bytes(this.secureSettingsPassword.getChars()); - try { - out.writeByteArray(passwordBytes); - } finally { - Arrays.fill(passwordBytes, (byte) 0); - } - } - - /** - * Encodes the provided char[] to a UTF-8 byte[]. This is done while avoiding - * conversions to String. The provided char[] is not modified by this method, so - * the caller needs to take care of clearing the value if it is sensitive. - */ - private static byte[] charsToUtf8Bytes(char[] chars) { - final CharBuffer charBuffer = CharBuffer.wrap(chars); - final ByteBuffer byteBuffer = StandardCharsets.UTF_8.encode(charBuffer); - final byte[] bytes; - if (byteBuffer.hasArray()) { - // there is no guarantee that the byte buffers backing array is the right size - // so we need to make a copy - bytes = Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(), byteBuffer.limit()); - Arrays.fill(byteBuffer.array(), (byte) 0); // clear sensitive data - } else { - final int length = byteBuffer.limit() - byteBuffer.position(); - bytes = new byte[length]; - byteBuffer.get(bytes); - // if the buffer is not read only we can reset and fill with 0's - if (byteBuffer.isReadOnly() == false) { - byteBuffer.clear(); // reset - for (int i = 0; i < byteBuffer.limit(); i++) { - byteBuffer.put((byte) 0); - } - } - } - return bytes; - } - - /** - * Decodes the provided byte[] to a UTF-8 char[]. This is done while avoiding - * conversions to String. The provided byte[] is not modified by this method, so - * the caller needs to take care of clearing the value if it is sensitive. - */ - public static char[] utf8BytesToChars(byte[] utf8Bytes) { - final ByteBuffer byteBuffer = ByteBuffer.wrap(utf8Bytes); - final CharBuffer charBuffer = StandardCharsets.UTF_8.decode(byteBuffer); - final char[] chars; - if (charBuffer.hasArray()) { - // there is no guarantee that the char buffers backing array is the right size - // so we need to make a copy - chars = Arrays.copyOfRange(charBuffer.array(), charBuffer.position(), charBuffer.limit()); - Arrays.fill(charBuffer.array(), (char) 0); // clear sensitive data - } else { - final int length = charBuffer.limit() - charBuffer.position(); - chars = new char[length]; - charBuffer.get(chars); - // if the buffer is not read only we can reset and fill with 0's - if (charBuffer.isReadOnly() == false) { - charBuffer.clear(); // reset - for (int i = 0; i < charBuffer.limit(); i++) { - charBuffer.put((char) 0); - } - } - } - return chars; - } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/NodesReloadSecureSettingsRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/NodesReloadSecureSettingsRequestBuilder.java index b5f2f73e56f51..c8250455e6ba3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/NodesReloadSecureSettingsRequestBuilder.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/NodesReloadSecureSettingsRequestBuilder.java @@ -19,19 +19,8 @@ package org.elasticsearch.action.admin.cluster.node.reload; -import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.action.support.nodes.NodesOperationRequestBuilder; import org.elasticsearch.client.ElasticsearchClient; -import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.settings.SecureString; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Objects; /** * Builder for the reload secure settings nodes request @@ -39,46 +28,8 @@ public class NodesReloadSecureSettingsRequestBuilder extends NodesOperationRequestBuilder { - public static final String SECURE_SETTINGS_PASSWORD_FIELD_NAME = "secure_settings_password"; - public NodesReloadSecureSettingsRequestBuilder(ElasticsearchClient client, NodesReloadSecureSettingsAction action) { super(client, action, new NodesReloadSecureSettingsRequest()); } - public NodesReloadSecureSettingsRequestBuilder setSecureStorePassword(SecureString secureStorePassword) { - request.secureStorePassword(secureStorePassword); - return this; - } - - public NodesReloadSecureSettingsRequestBuilder source(BytesReference source, XContentType xContentType) throws IOException { - Objects.requireNonNull(xContentType); - // EMPTY is ok here because we never call namedObject - try (InputStream stream = source.streamInput(); - XContentParser parser = xContentType.xContent().createParser(NamedXContentRegistry.EMPTY, - LoggingDeprecationHandler.INSTANCE, stream)) { - XContentParser.Token token; - token = parser.nextToken(); - if (token != XContentParser.Token.START_OBJECT) { - throw new ElasticsearchParseException("expected an object, but found token [{}]", token); - } - token = parser.nextToken(); - if (token != XContentParser.Token.FIELD_NAME || false == SECURE_SETTINGS_PASSWORD_FIELD_NAME.equals(parser.currentName())) { - throw new ElasticsearchParseException("expected a field named [{}], but found [{}]", SECURE_SETTINGS_PASSWORD_FIELD_NAME, - token); - } - token = parser.nextToken(); - if (token != XContentParser.Token.VALUE_STRING) { - throw new ElasticsearchParseException("expected field [{}] to be of type string, but found [{}] instead", - SECURE_SETTINGS_PASSWORD_FIELD_NAME, token); - } - final String password = parser.text(); - setSecureStorePassword(new SecureString(password.toCharArray())); - token = parser.nextToken(); - if (token != XContentParser.Token.END_OBJECT) { - throw new ElasticsearchParseException("expected end of object, but found token [{}]", token); - } - } - return this; - } - } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java index 0f44170fa603b..b8a36bac68d61 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.KeyStoreWrapper; -import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.plugins.PluginsService; @@ -82,16 +81,13 @@ protected NodesReloadSecureSettingsResponse.NodeResponse newNodeResponse() { @Override protected NodesReloadSecureSettingsResponse.NodeResponse nodeOperation(NodeRequest nodeReloadRequest) { - final NodesReloadSecureSettingsRequest request = nodeReloadRequest.request; - final SecureString secureSettingsPassword = request.secureSettingsPassword(); try (KeyStoreWrapper keystore = KeyStoreWrapper.load(environment.configFile())) { // reread keystore from config file if (keystore == null) { return new NodesReloadSecureSettingsResponse.NodeResponse(clusterService.localNode(), new IllegalStateException("Keystore is missing")); } - // decrypt the keystore using the password from the request - keystore.decrypt(secureSettingsPassword.getChars()); + keystore.decrypt(new char[0]); // add the keystore to the original node settings object final Settings settingsWithKeystore = Settings.builder() .put(environment.settings(), false) diff --git a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java index 2853842c646bc..54d06d116552f 100644 --- a/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java +++ b/server/src/main/java/org/elasticsearch/ingest/ConfigurationUtils.java @@ -284,14 +284,14 @@ public static ElasticsearchException newConfigurationException(String processorT msg = "[" + propertyName + "] " + reason; } ElasticsearchParseException exception = new ElasticsearchParseException(msg); - addHeadersToException(exception, processorType, processorTag, propertyName); + addMetadataToException(exception, processorType, processorTag, propertyName); return exception; } public static ElasticsearchException newConfigurationException(String processorType, String processorTag, String propertyName, Exception cause) { ElasticsearchException exception = ExceptionsHelper.convertToElastic(cause); - addHeadersToException(exception, processorType, processorTag, propertyName); + addMetadataToException(exception, processorType, processorTag, propertyName); return exception; } @@ -341,16 +341,16 @@ public String execute() { } } - private static void addHeadersToException(ElasticsearchException exception, String processorType, - String processorTag, String propertyName) { + private static void addMetadataToException(ElasticsearchException exception, String processorType, + String processorTag, String propertyName) { if (processorType != null) { - exception.addHeader("processor_type", processorType); + exception.addMetadata("es.processor_type", processorType); } if (processorTag != null) { - exception.addHeader("processor_tag", processorTag); + exception.addMetadata("es.processor_tag", processorTag); } if (propertyName != null) { - exception.addHeader("property_name", propertyName); + exception.addMetadata("es.property_name", propertyName); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestReloadSecureSettingsAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestReloadSecureSettingsAction.java index 0697871ea5d1c..2251615d678fb 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestReloadSecureSettingsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestReloadSecureSettingsAction.java @@ -59,7 +59,6 @@ public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client .cluster() .prepareReloadSecureSettings() .setTimeout(request.param("timeout")) - .source(request.requiredContent(), request.getXContentType()) .setNodesIds(nodesIds); final NodesReloadSecureSettingsRequest nodesRequest = nodesRequestBuilder.request(); return channel -> nodesRequestBuilder @@ -68,12 +67,12 @@ public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client public RestResponse buildResponse(NodesReloadSecureSettingsResponse response, XContentBuilder builder) throws Exception { builder.startObject(); - RestActions.buildNodesHeader(builder, channel.request(), response); - builder.field("cluster_name", response.getClusterName().value()); - response.toXContent(builder, channel.request()); + { + RestActions.buildNodesHeader(builder, channel.request(), response); + builder.field("cluster_name", response.getClusterName().value()); + response.toXContent(builder, channel.request()); + } builder.endObject(); - // clear password for the original request - nodesRequest.secureSettingsPassword().close(); return new BytesRestResponse(RestStatus.OK, builder); } }); diff --git a/server/src/test/java/org/elasticsearch/action/admin/ReloadSecureSettingsIT.java b/server/src/test/java/org/elasticsearch/action/admin/ReloadSecureSettingsIT.java index 7952758240544..3f9e258ffec1c 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/ReloadSecureSettingsIT.java +++ b/server/src/test/java/org/elasticsearch/action/admin/ReloadSecureSettingsIT.java @@ -20,11 +20,9 @@ package org.elasticsearch.action.admin; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.admin.cluster.node.reload.NodesReloadSecureSettingsResponse; import org.elasticsearch.common.settings.KeyStoreWrapper; import org.elasticsearch.common.settings.SecureSettings; -import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.plugins.Plugin; @@ -44,11 +42,11 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicReference; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; -import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.containsString; public class ReloadSecureSettingsIT extends ESIntegTestCase { @@ -62,7 +60,7 @@ public void testMissingKeystoreFile() throws Exception { Files.deleteIfExists(KeyStoreWrapper.keystorePath(environment.configFile())); final int initialReloadCount = mockReloadablePlugin.getReloadCount(); final CountDownLatch latch = new CountDownLatch(1); - client().admin().cluster().prepareReloadSecureSettings().setSecureStorePassword(new SecureString(new char[0])).execute( + client().admin().cluster().prepareReloadSecureSettings().execute( new ActionListener() { @Override public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { @@ -96,44 +94,6 @@ public void onFailure(Exception e) { assertThat(mockReloadablePlugin.getReloadCount(), equalTo(initialReloadCount)); } - public void testNullKeystorePassword() throws Exception { - final PluginsService pluginsService = internalCluster().getInstance(PluginsService.class); - final MockReloadablePlugin mockReloadablePlugin = pluginsService.filterPlugins(MockReloadablePlugin.class) - .stream().findFirst().get(); - final AtomicReference reloadSettingsError = new AtomicReference<>(); - final int initialReloadCount = mockReloadablePlugin.getReloadCount(); - final CountDownLatch latch = new CountDownLatch(1); - client().admin().cluster().prepareReloadSecureSettings().execute( - new ActionListener() { - @Override - public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { - try { - reloadSettingsError.set(new AssertionError("Null keystore password should fail")); - } finally { - latch.countDown(); - } - } - - @Override - public void onFailure(Exception e) { - try { - assertThat(e, instanceOf(ActionRequestValidationException.class)); - assertThat(e.getMessage(), containsString("secure settings password cannot be null")); - } catch (final AssertionError ae) { - reloadSettingsError.set(ae); - } finally { - latch.countDown(); - } - } - }); - latch.await(); - if (reloadSettingsError.get() != null) { - throw reloadSettingsError.get(); - } - // in the null password case no reload should be triggered - assertThat(mockReloadablePlugin.getReloadCount(), equalTo(initialReloadCount)); - } - public void testInvalidKeystoreFile() throws Exception { final PluginsService pluginsService = internalCluster().getInstance(PluginsService.class); final MockReloadablePlugin mockReloadablePlugin = pluginsService.filterPlugins(MockReloadablePlugin.class) @@ -149,7 +109,7 @@ public void testInvalidKeystoreFile() throws Exception { Files.copy(keystore, KeyStoreWrapper.keystorePath(environment.configFile()), StandardCopyOption.REPLACE_EXISTING); } final CountDownLatch latch = new CountDownLatch(1); - client().admin().cluster().prepareReloadSecureSettings().setSecureStorePassword(new SecureString(new char[0])).execute( + client().admin().cluster().prepareReloadSecureSettings().execute( new ActionListener() { @Override public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { @@ -181,52 +141,6 @@ public void onFailure(Exception e) { assertThat(mockReloadablePlugin.getReloadCount(), equalTo(initialReloadCount)); } - public void testWrongKeystorePassword() throws Exception { - final PluginsService pluginsService = internalCluster().getInstance(PluginsService.class); - final MockReloadablePlugin mockReloadablePlugin = pluginsService.filterPlugins(MockReloadablePlugin.class) - .stream().findFirst().get(); - final Environment environment = internalCluster().getInstance(Environment.class); - final AtomicReference reloadSettingsError = new AtomicReference<>(); - final int initialReloadCount = mockReloadablePlugin.getReloadCount(); - // "some" keystore should be present in this case - writeEmptyKeystore(environment, new char[0]); - final CountDownLatch latch = new CountDownLatch(1); - client().admin() - .cluster() - .prepareReloadSecureSettings() - .setSecureStorePassword(new SecureString(new char[] { 'W', 'r', 'o', 'n', 'g' })) - .execute(new ActionListener() { - @Override - public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { - try { - assertThat(nodesReloadResponse, notNullValue()); - final Map nodesMap = nodesReloadResponse.getNodesMap(); - assertThat(nodesMap.size(), equalTo(cluster().size())); - for (final NodesReloadSecureSettingsResponse.NodeResponse nodeResponse : nodesReloadResponse.getNodes()) { - assertThat(nodeResponse.reloadException(), notNullValue()); - assertThat(nodeResponse.reloadException(), instanceOf(SecurityException.class)); - } - } catch (final AssertionError e) { - reloadSettingsError.set(e); - } finally { - latch.countDown(); - } - } - - @Override - public void onFailure(Exception e) { - reloadSettingsError.set(new AssertionError("Nodes request failed", e)); - latch.countDown(); - } - }); - latch.await(); - if (reloadSettingsError.get() != null) { - throw reloadSettingsError.get(); - } - // in the wrong password case no reload should be triggered - assertThat(mockReloadablePlugin.getReloadCount(), equalTo(initialReloadCount)); - } - public void testMisbehavingPlugin() throws Exception { final Environment environment = internalCluster().getInstance(Environment.class); final PluginsService pluginsService = internalCluster().getInstance(PluginsService.class); @@ -247,7 +161,7 @@ public void testMisbehavingPlugin() throws Exception { .get(Settings.builder().put(environment.settings()).setSecureSettings(secureSettings).build()) .toString(); final CountDownLatch latch = new CountDownLatch(1); - client().admin().cluster().prepareReloadSecureSettings().setSecureStorePassword(new SecureString(new char[0])).execute( + client().admin().cluster().prepareReloadSecureSettings().execute( new ActionListener() { @Override public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { @@ -314,7 +228,7 @@ protected Collection> nodePlugins() { private void successfulReloadCall() throws InterruptedException { final AtomicReference reloadSettingsError = new AtomicReference<>(); final CountDownLatch latch = new CountDownLatch(1); - client().admin().cluster().prepareReloadSecureSettings().setSecureStorePassword(new SecureString(new char[0])).execute( + client().admin().cluster().prepareReloadSecureSettings().execute( new ActionListener() { @Override public void onResponse(NodesReloadSecureSettingsResponse nodesReloadResponse) { diff --git a/server/src/test/java/org/elasticsearch/cluster/ack/AckIT.java b/server/src/test/java/org/elasticsearch/cluster/ack/AckIT.java index 2cd8a2c27c714..df97854cc35b0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ack/AckIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/ack/AckIT.java @@ -19,6 +19,7 @@ package org.elasticsearch.cluster.ack; +import org.apache.lucene.util.LuceneTestCase.AwaitsFix; import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteResponse; import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; @@ -50,6 +51,7 @@ import static org.hamcrest.Matchers.notNullValue; @ClusterScope(minNumDataNodes = 2) +@AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/32767") public class AckIT extends ESIntegTestCase { @Override diff --git a/server/src/test/java/org/elasticsearch/index/shard/GlobalCheckpointListenersTests.java b/server/src/test/java/org/elasticsearch/index/shard/GlobalCheckpointListenersTests.java index d9240602d8519..43b16c6ecc78f 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/GlobalCheckpointListenersTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/GlobalCheckpointListenersTests.java @@ -341,7 +341,7 @@ public void testNotificationUsesExecutor() { globalCheckpointListeners.add(NO_OPS_PERFORMED, (g, e) -> {}); } globalCheckpointListeners.globalCheckpointUpdated(randomLongBetween(NO_OPS_PERFORMED, Long.MAX_VALUE)); - assertThat(count.get(), equalTo(1)); + assertThat(count.get(), equalTo(numberOfListeners == 0 ? 0 : 1)); } public void testConcurrency() throws BrokenBarrierException, InterruptedException { diff --git a/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java b/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java index af863410f9f35..61afd9ce2a473 100644 --- a/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java @@ -131,9 +131,9 @@ public void testReadProcessors() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> ConfigurationUtils.readProcessorConfigs(config, registry)); assertThat(e.getMessage(), equalTo("No processor type exists with name [unknown_processor]")); - assertThat(e.getHeader("processor_tag"), equalTo(Collections.singletonList("my_unknown"))); - assertThat(e.getHeader("processor_type"), equalTo(Collections.singletonList("unknown_processor"))); - assertThat(e.getHeader("property_name"), is(nullValue())); + assertThat(e.getMetadata("es.processor_tag"), equalTo(Collections.singletonList("my_unknown"))); + assertThat(e.getMetadata("es.processor_type"), equalTo(Collections.singletonList("unknown_processor"))); + assertThat(e.getMetadata("es.property_name"), is(nullValue())); List> config2 = new ArrayList<>(); unknownTaggedConfig = new HashMap<>(); @@ -144,17 +144,17 @@ public void testReadProcessors() throws Exception { config2.add(Collections.singletonMap("second_unknown_processor", secondUnknonwTaggedConfig)); e = expectThrows(ElasticsearchParseException.class, () -> ConfigurationUtils.readProcessorConfigs(config2, registry)); assertThat(e.getMessage(), equalTo("No processor type exists with name [unknown_processor]")); - assertThat(e.getHeader("processor_tag"), equalTo(Collections.singletonList("my_unknown"))); - assertThat(e.getHeader("processor_type"), equalTo(Collections.singletonList("unknown_processor"))); - assertThat(e.getHeader("property_name"), is(nullValue())); + assertThat(e.getMetadata("es.processor_tag"), equalTo(Collections.singletonList("my_unknown"))); + assertThat(e.getMetadata("es.processor_type"), equalTo(Collections.singletonList("unknown_processor"))); + assertThat(e.getMetadata("es.property_name"), is(nullValue())); assertThat(e.getSuppressed().length, equalTo(1)); assertThat(e.getSuppressed()[0], instanceOf(ElasticsearchParseException.class)); ElasticsearchParseException e2 = (ElasticsearchParseException) e.getSuppressed()[0]; assertThat(e2.getMessage(), equalTo("No processor type exists with name [second_unknown_processor]")); - assertThat(e2.getHeader("processor_tag"), equalTo(Collections.singletonList("my_second_unknown"))); - assertThat(e2.getHeader("processor_type"), equalTo(Collections.singletonList("second_unknown_processor"))); - assertThat(e2.getHeader("property_name"), is(nullValue())); + assertThat(e2.getMetadata("es.processor_tag"), equalTo(Collections.singletonList("my_second_unknown"))); + assertThat(e2.getMetadata("es.processor_type"), equalTo(Collections.singletonList("second_unknown_processor"))); + assertThat(e2.getMetadata("es.property_name"), is(nullValue())); } public void testReadProcessorFromObjectOrMap() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/ingest/PipelineStoreTests.java b/server/src/test/java/org/elasticsearch/ingest/PipelineStoreTests.java index 250bb5059cf58..d0ce465fc9ef8 100644 --- a/server/src/test/java/org/elasticsearch/ingest/PipelineStoreTests.java +++ b/server/src/test/java/org/elasticsearch/ingest/PipelineStoreTests.java @@ -356,8 +356,8 @@ public void testValidate() throws Exception { ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> store.validatePipeline(ingestInfos, putRequest)); assertEquals("Processor type [remove] is not installed on node [" + node2 + "]", e.getMessage()); - assertEquals("remove", e.getHeader("processor_type").get(0)); - assertEquals("tag2", e.getHeader("processor_tag").get(0)); + assertEquals("remove", e.getMetadata("es.processor_type").get(0)); + assertEquals("tag2", e.getMetadata("es.processor_tag").get(0)); ingestInfos.put(node2, new IngestInfo(Arrays.asList(new ProcessorInfo("set"), new ProcessorInfo("remove")))); store.validatePipeline(ingestInfos, putRequest); diff --git a/server/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java b/server/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java index 31fcfa7155cc0..4005f1218a92f 100644 --- a/server/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java +++ b/server/src/test/java/org/elasticsearch/search/scroll/DuelScrollIT.java @@ -21,6 +21,7 @@ import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.randomizedtesting.generators.RandomPicks; + import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchType; @@ -198,6 +199,8 @@ private int createIndex(boolean singleShard) throws Exception { } // no replicas, as they might be ordered differently settings.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0); + // we need to control refreshes as they might take different merges into account + settings.put("index.refresh_interval", -1); assertAcked(prepareCreate("test").setSettings(settings.build()).get()); final int numDocs = randomIntBetween(10, 200); diff --git a/x-pack/docs/en/security/configuring-es.asciidoc b/x-pack/docs/en/security/configuring-es.asciidoc index 5e8f1adbc7aa8..53f36afc73481 100644 --- a/x-pack/docs/en/security/configuring-es.asciidoc +++ b/x-pack/docs/en/security/configuring-es.asciidoc @@ -9,7 +9,7 @@ password-protect your data as well as implement more advanced security measures such as encrypting communications, role-based access control, IP filtering, and auditing. For more information, see -{xpack-ref}/xpack-security.html[Securing the Elastic Stack]. +{xpack-ref}/elasticsearch-security.html[Securing the Elastic Stack]. To use {security} in {es}: diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/token/CreateTokenRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/token/CreateTokenRequest.java index 5956e1a661345..fdb46711c0c59 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/token/CreateTokenRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/token/CreateTokenRequest.java @@ -15,7 +15,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.SecureString; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; +import org.elasticsearch.common.CharArrays; import java.io.IOException; import java.util.Arrays; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/ChangePasswordRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/ChangePasswordRequest.java index f84b133d984b6..b78b81c060080 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/ChangePasswordRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/ChangePasswordRequest.java @@ -12,7 +12,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; +import org.elasticsearch.common.CharArrays; import java.io.IOException; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/PutUserRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/PutUserRequest.java index f37072b9cf0fc..e704259396a34 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/PutUserRequest.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/action/user/PutUserRequest.java @@ -8,12 +8,12 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.CharArrays; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; import java.io.IOException; import java.util.Map; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/BCrypt.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/BCrypt.java index ceb93dc4c853c..a93476bbdc8da 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/BCrypt.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/BCrypt.java @@ -14,6 +14,7 @@ // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +import org.elasticsearch.common.CharArrays; import org.elasticsearch.common.settings.SecureString; import java.security.SecureRandom; @@ -54,7 +55,7 @@ * String stronger_salt = BCrypt.gensalt(12)
* *

- * The amount of work increases exponentially (2**log_rounds), so + * The amount of work increases exponentially (2**log_rounds), so * each increment is twice as much work. The default log_rounds is * 10, and the valid range is 4 to 30. * @@ -689,7 +690,11 @@ public static String hashpw(SecureString password, String salt) { // the next lines are the SecureString replacement for the above commented-out section if (minor >= 'a') { - try (SecureString secureString = new SecureString(CharArrays.concat(password.getChars(), "\000".toCharArray()))) { + final char[] suffix = "\000".toCharArray(); + final char[] result = new char[password.length() + suffix.length]; + System.arraycopy(password.getChars(), 0, result, 0, password.length()); + System.arraycopy(suffix, 0, result, password.length(), suffix.length); + try (SecureString secureString = new SecureString(result)) { passwordb = CharArrays.toUtf8Bytes(secureString.getChars()); } } else { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/CharArrays.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/CharArrays.java deleted file mode 100644 index 26df90c31a2de..0000000000000 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/CharArrays.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License; - * you may not use this file except in compliance with the Elastic License. - */ -package org.elasticsearch.xpack.core.security.authc.support; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; - -/** - * Helper class similar to Arrays to handle conversions for Char arrays - */ -public class CharArrays { - - public static char[] utf8BytesToChars(byte[] utf8Bytes) { - ByteBuffer byteBuffer = ByteBuffer.wrap(utf8Bytes); - CharBuffer charBuffer = StandardCharsets.UTF_8.decode(byteBuffer); - char[] chars = Arrays.copyOfRange(charBuffer.array(), charBuffer.position(), charBuffer.limit()); - byteBuffer.clear(); - charBuffer.clear(); - return chars; - } - - /** - * Like String.indexOf for for an array of chars - */ - static int indexOf(char[] array, char ch) { - for (int i = 0; (i < array.length); i++) { - if (array[i] == ch) { - return i; - } - } - return -1; - } - - /** - * Converts the provided char[] to a UTF-8 byte[]. The provided char[] is not modified by this - * method, so the caller needs to take care of clearing the value if it is sensitive. - */ - public static byte[] toUtf8Bytes(char[] chars) { - CharBuffer charBuffer = CharBuffer.wrap(chars); - ByteBuffer byteBuffer = StandardCharsets.UTF_8.encode(charBuffer); - byte[] bytes = Arrays.copyOfRange(byteBuffer.array(), byteBuffer.position(), byteBuffer.limit()); - Arrays.fill(byteBuffer.array(), (byte) 0); // clear sensitive data - return bytes; - } - - public static boolean charsBeginsWith(String prefix, char[] chars) { - if (chars == null || prefix == null) { - return false; - } - - if (prefix.length() > chars.length) { - return false; - } - - for (int i = 0; i < prefix.length(); i++) { - if (chars[i] != prefix.charAt(i)) { - return false; - } - } - - return true; - } - - public static boolean constantTimeEquals(char[] a, char[] b) { - if (a.length != b.length) { - return false; - } - - int equals = 0; - for (int i = 0; i < a.length; i++) { - equals |= a[i] ^ b[i]; - } - - return equals == 0; - } - - public static boolean constantTimeEquals(String a, String b) { - if (a.length() != b.length()) { - return false; - } - - int equals = 0; - for (int i = 0; i < a.length(); i++) { - equals |= a.charAt(i) ^ b.charAt(i); - } - - return equals == 0; - } - - public static char[] concat(char[] a, char[] b) { - final char[] result = new char[a.length + b.length]; - System.arraycopy(a, 0, result, 0, a.length); - System.arraycopy(b, 0, result, a.length, b.length); - return result; - } -} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/Hasher.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/Hasher.java index d12547bd90645..492622b2c519c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/Hasher.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/Hasher.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.core.security.authc.support; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.CharArrays; import org.elasticsearch.common.hash.MessageDigests; import org.elasticsearch.common.settings.SecureString; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/UsernamePasswordToken.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/UsernamePasswordToken.java index d8e58c29d237b..1349303600884 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/UsernamePasswordToken.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authc/support/UsernamePasswordToken.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.core.security.authc.support; +import org.elasticsearch.common.CharArrays; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.util.concurrent.ThreadContext; @@ -107,7 +108,7 @@ private static UsernamePasswordToken extractToken(String headerValue) { throw authenticationError("invalid basic authentication header encoding", e); } - int i = CharArrays.indexOf(userpasswd, ':'); + int i = indexOfColon(userpasswd); if (i < 0) { throw authenticationError("invalid basic authentication header value"); } @@ -121,4 +122,15 @@ public static void putTokenHeader(ThreadContext context, UsernamePasswordToken t context.putHeader(BASIC_AUTH_HEADER, basicAuthHeaderValue(token.username, token.password)); } + /** + * Like String.indexOf for for an array of chars + */ + private static int indexOfColon(char[] array) { + for (int i = 0; (i < array.length); i++) { + if (array[i] == ':') { + return i; + } + } + return -1; + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ssl/PemUtils.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ssl/PemUtils.java index d959c017e0a35..a3814a76a3e6e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ssl/PemUtils.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ssl/PemUtils.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.core.ssl; import org.elasticsearch.common.hash.MessageDigests; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; +import org.elasticsearch.common.CharArrays; import java.io.BufferedReader; import java.io.IOException; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/crypto/CryptoService.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/crypto/CryptoService.java index b1f3a32769ec9..a25e79ffdf66f 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/crypto/CryptoService.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/crypto/CryptoService.java @@ -13,7 +13,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.xpack.core.watcher.WatcherField; import org.elasticsearch.xpack.core.security.SecurityField; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; +import org.elasticsearch.common.CharArrays; import javax.crypto.BadPaddingException; import javax.crypto.Cipher; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigTests.java index ffc13655d229c..3030449abd1b6 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/datafeed/DatafeedConfigTests.java @@ -100,7 +100,7 @@ public static DatafeedConfig createRandomizedDatafeedConfig(String jobId, long b if (aggHistogramInterval == null) { builder.setFrequency(TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000))); } else { - builder.setFrequency(TimeValue.timeValueMillis(randomIntBetween(1, 5) * aggHistogramInterval)); + builder.setFrequency(TimeValue.timeValueSeconds(randomIntBetween(1, 5) * aggHistogramInterval)); } } if (randomBoolean()) { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReaderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReaderTests.java index 38857e2170de4..dca2f37f3f224 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReaderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetReaderTests.java @@ -80,9 +80,8 @@ public void cleanDirectory() throws Exception { bitsetFilterCache.close(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/32457") public void testSearch() throws Exception { - IndexWriter iw = new IndexWriter(directory, newIndexWriterConfig()); + IndexWriter iw = new IndexWriter(directory, newIndexWriterConfig().setMergePolicy(newLogMergePolicy(random()))); Document document = new Document(); document.add(new StringField("field", "value1", Field.Store.NO)); diff --git a/x-pack/plugin/ml/log-structure-finder/build.gradle b/x-pack/plugin/ml/log-structure-finder/build.gradle new file mode 100644 index 0000000000000..9048a1c46860c --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/build.gradle @@ -0,0 +1,36 @@ +import org.elasticsearch.gradle.precommit.PrecommitTasks + +apply plugin: 'elasticsearch.build' + +archivesBaseName = 'x-pack-log-structure-finder' + +description = 'Common code for reverse engineering log structure' + +dependencies { + compile "org.elasticsearch:elasticsearch-core:${version}" + compile "org.elasticsearch:elasticsearch-x-content:${version}" + compile project(':libs:grok') + compile "com.ibm.icu:icu4j:${versions.icu4j}" + compile "net.sf.supercsv:super-csv:${versions.supercsv}" + + testCompile "org.elasticsearch.test:framework:${version}" +} + +configurations { + testArtifacts.extendsFrom testRuntime +} +task testJar(type: Jar) { + appendix 'test' + from sourceSets.test.output +} +artifacts { + // normal es plugins do not publish the jar but we need to since users need it for Transport Clients and extensions + archives jar + testArtifacts testJar +} + +forbiddenApisMain { + // log-structure-finder does not depend on server, so cannot forbid server methods + signaturesURLs = [PrecommitTasks.getResource('/forbidden/jdk-signatures.txt')] +} + diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-62.1.jar.sha1 b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-62.1.jar.sha1 new file mode 100644 index 0000000000000..c24c69cf4b90f --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-62.1.jar.sha1 @@ -0,0 +1 @@ +7a4d00d5ec5febd252a6182e8b6e87a0a9821f81 \ No newline at end of file diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-LICENSE.txt b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-LICENSE.txt new file mode 100644 index 0000000000000..e76faec4ad20f --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-LICENSE.txt @@ -0,0 +1,33 @@ +ICU License - ICU 1.8.1 and later + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2012 International Business Machines Corporation and others + +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, and/or sell copies of the +Software, and to permit persons to whom the Software is furnished to do so, +provided that the above copyright notice(s) and this permission notice appear +in all copies of the Software and that both the above copyright notice(s) and +this permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE +LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR +ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not +be used in advertising or otherwise to promote the sale, use or other +dealings in this Software without prior written authorization of the +copyright holder. + +All trademarks and registered trademarks mentioned herein are the property of +their respective owners. diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-NOTICE.txt b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-NOTICE.txt new file mode 100644 index 0000000000000..47eeab14f2ef6 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/licenses/icu4j-NOTICE.txt @@ -0,0 +1,3 @@ +ICU4J, (under lucene/analysis/icu) is licensed under an MIT style license +(modules/analysis/icu/lib/icu4j-LICENSE-BSD_LIKE.txt) and Copyright (c) 1995-2012 +International Business Machines Corporation and others \ No newline at end of file diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-2.4.0.jar.sha1 b/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-2.4.0.jar.sha1 new file mode 100644 index 0000000000000..a0b402133090d --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-2.4.0.jar.sha1 @@ -0,0 +1 @@ +017f8708c929029dde48bc298deaf3c7ae2452d3 \ No newline at end of file diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-LICENSE.txt b/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-LICENSE.txt new file mode 100644 index 0000000000000..9e0ad072b2527 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-LICENSE.txt @@ -0,0 +1,203 @@ +/* + * Apache License + * Version 2.0, January 2004 + * http://www.apache.org/licenses/ + * + * TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + * + * 1. Definitions. + * + * "License" shall mean the terms and conditions for use, reproduction, + * and distribution as defined by Sections 1 through 9 of this document. + * + * "Licensor" shall mean the copyright owner or entity authorized by + * the copyright owner that is granting the License. + * + * "Legal Entity" shall mean the union of the acting entity and all + * other entities that control, are controlled by, or are under common + * control with that entity. For the purposes of this definition, + * "control" means (i) the power, direct or indirect, to cause the + * direction or management of such entity, whether by contract or + * otherwise, or (ii) ownership of fifty percent (50%) or more of the + * outstanding shares, or (iii) beneficial ownership of such entity. + * + * "You" (or "Your") shall mean an individual or Legal Entity + * exercising permissions granted by this License. + * + * "Source" form shall mean the preferred form for making modifications, + * including but not limited to software source code, documentation + * source, and configuration files. + * + * "Object" form shall mean any form resulting from mechanical + * transformation or translation of a Source form, including but + * not limited to compiled object code, generated documentation, + * and conversions to other media types. + * + * "Work" shall mean the work of authorship, whether in Source or + * Object form, made available under the License, as indicated by a + * copyright notice that is included in or attached to the work + * (an example is provided in the Appendix below). + * + * "Derivative Works" shall mean any work, whether in Source or Object + * form, that is based on (or derived from) the Work and for which the + * editorial revisions, annotations, elaborations, or other modifications + * represent, as a whole, an original work of authorship. For the purposes + * of this License, Derivative Works shall not include works that remain + * separable from, or merely link (or bind by name) to the interfaces of, + * the Work and Derivative Works thereof. + * + * "Contribution" shall mean any work of authorship, including + * the original version of the Work and any modifications or additions + * to that Work or Derivative Works thereof, that is intentionally + * submitted to Licensor for inclusion in the Work by the copyright owner + * or by an individual or Legal Entity authorized to submit on behalf of + * the copyright owner. For the purposes of this definition, "submitted" + * means any form of electronic, verbal, or written communication sent + * to the Licensor or its representatives, including but not limited to + * communication on electronic mailing lists, source code control systems, + * and issue tracking systems that are managed by, or on behalf of, the + * Licensor for the purpose of discussing and improving the Work, but + * excluding communication that is conspicuously marked or otherwise + * designated in writing by the copyright owner as "Not a Contribution." + * + * "Contributor" shall mean Licensor and any individual or Legal Entity + * on behalf of whom a Contribution has been received by Licensor and + * subsequently incorporated within the Work. + * + * 2. Grant of Copyright License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * copyright license to reproduce, prepare Derivative Works of, + * publicly display, publicly perform, sublicense, and distribute the + * Work and such Derivative Works in Source or Object form. + * + * 3. Grant of Patent License. Subject to the terms and conditions of + * this License, each Contributor hereby grants to You a perpetual, + * worldwide, non-exclusive, no-charge, royalty-free, irrevocable + * (except as stated in this section) patent license to make, have made, + * use, offer to sell, sell, import, and otherwise transfer the Work, + * where such license applies only to those patent claims licensable + * by such Contributor that are necessarily infringed by their + * Contribution(s) alone or by combination of their Contribution(s) + * with the Work to which such Contribution(s) was submitted. If You + * institute patent litigation against any entity (including a + * cross-claim or counterclaim in a lawsuit) alleging that the Work + * or a Contribution incorporated within the Work constitutes direct + * or contributory patent infringement, then any patent licenses + * granted to You under this License for that Work shall terminate + * as of the date such litigation is filed. + * + * 4. Redistribution. You may reproduce and distribute copies of the + * Work or Derivative Works thereof in any medium, with or without + * modifications, and in Source or Object form, provided that You + * meet the following conditions: + * + * (a) You must give any other recipients of the Work or + * Derivative Works a copy of this License; and + * + * (b) You must cause any modified files to carry prominent notices + * stating that You changed the files; and + * + * (c) You must retain, in the Source form of any Derivative Works + * that You distribute, all copyright, patent, trademark, and + * attribution notices from the Source form of the Work, + * excluding those notices that do not pertain to any part of + * the Derivative Works; and + * + * (d) If the Work includes a "NOTICE" text file as part of its + * distribution, then any Derivative Works that You distribute must + * include a readable copy of the attribution notices contained + * within such NOTICE file, excluding those notices that do not + * pertain to any part of the Derivative Works, in at least one + * of the following places: within a NOTICE text file distributed + * as part of the Derivative Works; within the Source form or + * documentation, if provided along with the Derivative Works; or, + * within a display generated by the Derivative Works, if and + * wherever such third-party notices normally appear. The contents + * of the NOTICE file are for informational purposes only and + * do not modify the License. You may add Your own attribution + * notices within Derivative Works that You distribute, alongside + * or as an addendum to the NOTICE text from the Work, provided + * that such additional attribution notices cannot be construed + * as modifying the License. + * + * You may add Your own copyright statement to Your modifications and + * may provide additional or different license terms and conditions + * for use, reproduction, or distribution of Your modifications, or + * for any such Derivative Works as a whole, provided Your use, + * reproduction, and distribution of the Work otherwise complies with + * the conditions stated in this License. + * + * 5. Submission of Contributions. Unless You explicitly state otherwise, + * any Contribution intentionally submitted for inclusion in the Work + * by You to the Licensor shall be under the terms and conditions of + * this License, without any additional terms or conditions. + * Notwithstanding the above, nothing herein shall supersede or modify + * the terms of any separate license agreement you may have executed + * with Licensor regarding such Contributions. + * + * 6. Trademarks. This License does not grant permission to use the trade + * names, trademarks, service marks, or product names of the Licensor, + * except as required for reasonable and customary use in describing the + * origin of the Work and reproducing the content of the NOTICE file. + * + * 7. Disclaimer of Warranty. Unless required by applicable law or + * agreed to in writing, Licensor provides the Work (and each + * Contributor provides its Contributions) on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied, including, without limitation, any warranties or conditions + * of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + * PARTICULAR PURPOSE. You are solely responsible for determining the + * appropriateness of using or redistributing the Work and assume any + * risks associated with Your exercise of permissions under this License. + * + * 8. Limitation of Liability. In no event and under no legal theory, + * whether in tort (including negligence), contract, or otherwise, + * unless required by applicable law (such as deliberate and grossly + * negligent acts) or agreed to in writing, shall any Contributor be + * liable to You for damages, including any direct, indirect, special, + * incidental, or consequential damages of any character arising as a + * result of this License or out of the use or inability to use the + * Work (including but not limited to damages for loss of goodwill, + * work stoppage, computer failure or malfunction, or any and all + * other commercial damages or losses), even if such Contributor + * has been advised of the possibility of such damages. + * + * 9. Accepting Warranty or Additional Liability. While redistributing + * the Work or Derivative Works thereof, You may choose to offer, + * and charge a fee for, acceptance of support, warranty, indemnity, + * or other liability obligations and/or rights consistent with this + * License. However, in accepting such obligations, You may act only + * on Your own behalf and on Your sole responsibility, not on behalf + * of any other Contributor, and only if You agree to indemnify, + * defend, and hold each Contributor harmless for any liability + * incurred by, or claims asserted against, such Contributor by reason + * of your accepting any such warranty or additional liability. + * + * END OF TERMS AND CONDITIONS + * + * APPENDIX: How to apply the Apache License to your work. + * + * To apply the Apache License to your work, attach the following + * boilerplate notice, with the fields enclosed by brackets "[]" + * replaced with your own identifying information. (Don't include + * the brackets!) The text should be enclosed in the appropriate + * comment syntax for the file format. We also recommend that a + * file or class name and description of purpose be included on the + * same "printed page" as the copyright notice for easier + * identification within third-party archives. + * + * Copyright 2007 Kasper B. Graversen + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-NOTICE.txt b/x-pack/plugin/ml/log-structure-finder/licenses/super-csv-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java new file mode 100644 index 0000000000000..cb9e6537252cd --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.List; + +public class CsvLogStructureFinderFactory implements LogStructureFinderFactory { + + /** + * Rules are: + * - The file must be valid CSV + * - It must contain at least two complete records + * - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!) + * - Every CSV record except the last must have the same number of fields + * The reason the last record is allowed to have fewer fields than the others is that + * it could have been truncated when the file was sampled. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV"); + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + CsvPreference.EXCEL_PREFERENCE, false); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java new file mode 100644 index 0000000000000..186477507acce --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java @@ -0,0 +1,615 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.grok.Grok; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Creates Grok patterns that will match all provided sample messages. + * + * The choice of field names is quite primitive. The intention is that a human will edit these. + */ +public final class GrokPatternCreator { + + private static final Map PUNCTUATION_OR_SPACE_NEEDS_ESCAPING; + static { + HashMap punctuationOrSpaceNeedsEscaping = new HashMap<>(); + String punctuationAndSpaceCharacters = "\"'`‘’“”#@%=\\/|~:;,<>()[]{}«»^$*¿?¡!§¶ \t\n"; + String punctuationThatNeedsEscaping = "\\|()[]{}^$*?"; + punctuationAndSpaceCharacters.chars() + .forEach(c -> punctuationOrSpaceNeedsEscaping.put((char) c, punctuationThatNeedsEscaping.indexOf(c) >= 0)); + PUNCTUATION_OR_SPACE_NEEDS_ESCAPING = Collections.unmodifiableMap(punctuationOrSpaceNeedsEscaping); + } + + private static final String PREFACE = "preface"; + private static final String VALUE = "value"; + private static final String EPILOGUE = "epilogue"; + + /** + * Grok patterns that are designed to match the whole message, not just a part of it. + */ + private static final List FULL_MATCH_GROK_PATTERNS = Arrays.asList( + new FullMatchGrokPatternCandidate("BACULA_LOGLINE", "bts"), + new FullMatchGrokPatternCandidate("CATALINALOG", "timestamp"), + new FullMatchGrokPatternCandidate("COMBINEDAPACHELOG", "timestamp"), + new FullMatchGrokPatternCandidate("COMMONAPACHELOG", "timestamp"), + new FullMatchGrokPatternCandidate("ELB_ACCESS_LOG", "timestamp"), + new FullMatchGrokPatternCandidate("HAPROXYHTTP", "syslog_timestamp"), + new FullMatchGrokPatternCandidate("HAPROXYTCP", "syslog_timestamp"), + new FullMatchGrokPatternCandidate("HTTPD20_ERRORLOG", "timestamp"), + new FullMatchGrokPatternCandidate("HTTPD24_ERRORLOG", "timestamp"), + new FullMatchGrokPatternCandidate("NAGIOSLOGLINE", "nagios_epoch"), + new FullMatchGrokPatternCandidate("NETSCREENSESSIONLOG", "date"), + new FullMatchGrokPatternCandidate("RAILS3", "timestamp"), + new FullMatchGrokPatternCandidate("RUBY_LOGGER", "timestamp"), + new FullMatchGrokPatternCandidate("SHOREWALL", "timestamp"), + new FullMatchGrokPatternCandidate("TOMCATLOG", "timestamp") + ); + + /** + * The first match in this list will be chosen, so it needs to be ordered + * such that more generic patterns come after more specific patterns. + */ + private static final List ORDERED_CANDIDATE_GROK_PATTERNS = Arrays.asList( + new ValueOnlyGrokPatternCandidate("TOMCAT_DATESTAMP", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("TIMESTAMP_ISO8601", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("DATESTAMP_RFC822", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("DATESTAMP_RFC2822", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("DATESTAMP_OTHER", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("DATESTAMP_EVENTLOG", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("SYSLOGTIMESTAMP", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("HTTPDATE", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("CATALINA_DATESTAMP", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("CISCOTIMESTAMP", "date", "extra_timestamp"), + new ValueOnlyGrokPatternCandidate("LOGLEVEL", "keyword", "loglevel"), + new ValueOnlyGrokPatternCandidate("URI", "keyword", "uri"), + new ValueOnlyGrokPatternCandidate("UUID", "keyword", "uuid"), + new ValueOnlyGrokPatternCandidate("MAC", "keyword", "macaddress"), + // Can't use \b as the breaks, because slashes are not "word" characters + new ValueOnlyGrokPatternCandidate("PATH", "keyword", "path", "(? explanation; + private final Collection sampleMessages; + + /** + * It is expected that the mappings will be shared with other code. + * Both this class and other classes will update it. + */ + private final Map mappings; + private final Map fieldNameCountStore = new HashMap<>(); + private final StringBuilder overallGrokPatternBuilder = new StringBuilder(); + + /** + * + * @param explanation List of reasons for making decisions. May contain items when passed and new reasons + * can be appended by the methods of this class. + * @param sampleMessages Sample messages that any Grok pattern found must match. + * @param mappings Will be updated with mappings appropriate for the returned pattern, if non-null. + */ + public GrokPatternCreator(List explanation, Collection sampleMessages, Map mappings) { + this.explanation = explanation; + this.sampleMessages = Collections.unmodifiableCollection(sampleMessages); + this.mappings = mappings; + } + + /** + * This method attempts to find a Grok pattern that will match all of the sample messages in their entirety. + * @return A tuple of (time field name, Grok string), or null if no suitable Grok pattern was found. + */ + public Tuple findFullLineGrokPattern() { + + for (FullMatchGrokPatternCandidate candidate : FULL_MATCH_GROK_PATTERNS) { + if (candidate.matchesAll(sampleMessages)) { + return candidate.processMatch(explanation, sampleMessages, mappings); + } + } + + return null; + } + + /** + * Build a Grok pattern that will match all of the sample messages in their entirety. + * @param seedPatternName A pattern that has already been determined to match some portion of every sample message. + * @param seedFieldName The field name to be used for the portion of every sample message that the seed pattern matches. + * @return The built Grok pattern. + */ + public String createGrokPatternFromExamples(String seedPatternName, String seedFieldName) { + + overallGrokPatternBuilder.setLength(0); + + GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate(seedPatternName, seedFieldName); + + processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0); + + return overallGrokPatternBuilder.toString().replace("\t", "\\t").replace("\n", "\\n"); + } + + /** + * This is purely to allow unit tests to inspect the partial Grok pattern after testing implementation details. + * It should not be used in production code. + */ + StringBuilder getOverallGrokPatternBuilder() { + return overallGrokPatternBuilder; + } + + /** + * Given a chosen Grok pattern and a collection of message snippets, split the snippets into the + * matched section and the pieces before and after it. Recurse to find more matches in the pieces + * before and after and update the supplied string builder. + */ + private void processCandidateAndSplit(GrokPatternCandidate chosenPattern, boolean isLast, Collection snippets, + boolean ignoreKeyValueCandidateLeft, int ignoreValueOnlyCandidatesLeft, + boolean ignoreKeyValueCandidateRight, int ignoreValueOnlyCandidatesRight) { + + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + String patternBuilderContent = chosenPattern.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, mappings); + appendBestGrokMatchForStrings(false, prefaces, ignoreKeyValueCandidateLeft, ignoreValueOnlyCandidatesLeft); + overallGrokPatternBuilder.append(patternBuilderContent); + appendBestGrokMatchForStrings(isLast, epilogues, ignoreKeyValueCandidateRight, ignoreValueOnlyCandidatesRight); + } + + /** + * Given a collection of message snippets, work out which (if any) of the Grok patterns we're allowed + * to use matches it best. Then append the appropriate Grok language to represent that finding onto + * the supplied string builder. + */ + void appendBestGrokMatchForStrings(boolean isLast, Collection snippets, + boolean ignoreKeyValueCandidate, int ignoreValueOnlyCandidates) { + + snippets = adjustForPunctuation(snippets); + + GrokPatternCandidate bestCandidate = null; + if (snippets.isEmpty() == false) { + GrokPatternCandidate kvCandidate = new KeyValueGrokPatternCandidate(explanation); + if (ignoreKeyValueCandidate == false && kvCandidate.matchesAll(snippets)) { + bestCandidate = kvCandidate; + } else { + ignoreKeyValueCandidate = true; + for (GrokPatternCandidate candidate : + ORDERED_CANDIDATE_GROK_PATTERNS.subList(ignoreValueOnlyCandidates, ORDERED_CANDIDATE_GROK_PATTERNS.size())) { + if (candidate.matchesAll(snippets)) { + bestCandidate = candidate; + break; + } + ++ignoreValueOnlyCandidates; + } + } + } + + if (bestCandidate == null) { + if (isLast) { + finalizeGrokPattern(snippets); + } else { + addIntermediateRegex(snippets); + } + } else { + processCandidateAndSplit(bestCandidate, isLast, snippets, true, ignoreValueOnlyCandidates + (ignoreKeyValueCandidate ? 1 : 0), + ignoreKeyValueCandidate, ignoreValueOnlyCandidates); + } + } + + /** + * If the snippets supplied begin with more than 1 character of common punctuation or whitespace + * then add all but the last of these characters to the overall pattern and remove them from the + * snippets. + * @param snippets Input snippets - not modified. + * @return Output snippets, which will be a copy of the input snippets but with whatever characters + * were added to overallPatternBuilder removed from the beginning. + */ + Collection adjustForPunctuation(Collection snippets) { + + assert snippets.isEmpty() == false; + + StringBuilder commonInitialPunctuation = new StringBuilder(); + + for (String snippet : snippets) { + + if (commonInitialPunctuation.length() == 0) { + for (int index = 0; index < snippet.length(); ++index) { + char ch = snippet.charAt(index); + if (PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.get(ch) != null) { + commonInitialPunctuation.append(ch); + } else { + break; + } + } + } else { + if (commonInitialPunctuation.length() > snippet.length()) { + commonInitialPunctuation.delete(snippet.length(), commonInitialPunctuation.length()); + } + for (int index = 0; index < commonInitialPunctuation.length(); ++index) { + char ch = snippet.charAt(index); + if (ch != commonInitialPunctuation.charAt(index)) { + commonInitialPunctuation.delete(index, commonInitialPunctuation.length()); + break; + } + } + } + + if (commonInitialPunctuation.length() <= 1) { + return snippets; + } + } + + int numLiteralCharacters = commonInitialPunctuation.length() - 1; + + for (int index = 0; index < numLiteralCharacters; ++index) { + char ch = commonInitialPunctuation.charAt(index); + if (PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.getOrDefault(ch, false)) { + overallGrokPatternBuilder.append('\\'); + } + overallGrokPatternBuilder.append(ch); + } + + return snippets.stream().map(snippet -> snippet.substring(numLiteralCharacters)).collect(Collectors.toList()); + } + + /** + * The first time a particular field name is passed, simply return it. + * The second time return it with "2" appended. + * The third time return it with "3" appended. + * Etc. + */ + static String buildFieldName(Map fieldNameCountStore, String fieldName) { + Integer numberSeen = fieldNameCountStore.compute(fieldName, (k, v) -> 1 + ((v == null) ? 0 : v)); + return (numberSeen > 1) ? fieldName + numberSeen : fieldName; + } + + private void addIntermediateRegex(Collection snippets) { + addIntermediateRegex(overallGrokPatternBuilder, snippets); + } + + public static void addIntermediateRegex(StringBuilder patternBuilder, Collection snippets) { + if (snippets.isEmpty()) { + return; + } + + List others = new ArrayList<>(snippets); + String driver = others.remove(others.size() - 1); + + boolean wildcardRequiredIfNonMatchFound = true; + for (int i = 0; i < driver.length(); ++i) { + char ch = driver.charAt(i); + Boolean punctuationOrSpaceNeedsEscaping = PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.get(ch); + if (punctuationOrSpaceNeedsEscaping != null && others.stream().allMatch(other -> other.indexOf(ch) >= 0)) { + if (wildcardRequiredIfNonMatchFound && others.stream().anyMatch(other -> other.indexOf(ch) > 0)) { + patternBuilder.append(".*?"); + } + if (punctuationOrSpaceNeedsEscaping) { + patternBuilder.append('\\'); + } + patternBuilder.append(ch); + wildcardRequiredIfNonMatchFound = true; + others = others.stream().map(other -> other.substring(other.indexOf(ch) + 1)).collect(Collectors.toList()); + } else if (wildcardRequiredIfNonMatchFound) { + patternBuilder.append(".*?"); + wildcardRequiredIfNonMatchFound = false; + } + } + + if (wildcardRequiredIfNonMatchFound && others.stream().anyMatch(s -> s.isEmpty() == false)) { + patternBuilder.append(".*?"); + } + } + + private void finalizeGrokPattern(Collection snippets) { + if (snippets.stream().allMatch(String::isEmpty)) { + return; + } + + List others = new ArrayList<>(snippets); + String driver = others.remove(others.size() - 1); + + for (int i = 0; i < driver.length(); ++i) { + char ch = driver.charAt(i); + int driverIndex = i; + Boolean punctuationOrSpaceNeedsEscaping = PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.get(ch); + if (punctuationOrSpaceNeedsEscaping != null && + others.stream().allMatch(other -> other.length() > driverIndex && other.charAt(driverIndex) == ch)) { + if (punctuationOrSpaceNeedsEscaping) { + overallGrokPatternBuilder.append('\\'); + } + overallGrokPatternBuilder.append(ch); + if (i == driver.length() - 1 && others.stream().allMatch(driver::equals)) { + return; + } + } else { + break; + } + } + + overallGrokPatternBuilder.append(".*"); + } + + interface GrokPatternCandidate { + + /** + * @return Does this Grok pattern candidate match all the snippets? + */ + boolean matchesAll(Collection snippets); + + /** + * After it has been determined that this Grok pattern candidate matches a collection of strings, + * return collections of the bits that come before (prefaces) and after (epilogues) the bit + * that matches. Also update mappings with the most appropriate field name and type. + * @return The string that needs to be incorporated into the overall Grok pattern for the line. + */ + String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, + Collection epilogues, Map mappings); + } + + /** + * A Grok pattern candidate that will match a single named Grok pattern. + */ + static class ValueOnlyGrokPatternCandidate implements GrokPatternCandidate { + + private final String grokPatternName; + private final String mappingType; + private final String fieldName; + private final Grok grok; + + /** + * Pre/post breaks default to \b, but this may not be appropriate for Grok patterns that start or + * end with a non "word" character (i.e. letter, number or underscore). For such patterns use one + * of the other constructors. + *

+ * In cases where the Grok pattern defined by Logstash already includes conditions on what must + * come before and after the match, use one of the other constructors and specify an empty string + * for the pre and/or post breaks. + * + * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash. + * @param fieldName Name of the field to extract from the match. + */ + ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName) { + this(grokPatternName, mappingType, fieldName, "\\b", "\\b"); + } + + /** + * @param grokPatternName Name of the Grok pattern to try to match - must match one defined in Logstash. + * @param mappingType Data type for field in Elasticsearch mappings. + * @param fieldName Name of the field to extract from the match. + * @param preBreak Only consider the match if it's broken from the previous text by this. + * @param postBreak Only consider the match if it's broken from the following text by this. + */ + ValueOnlyGrokPatternCandidate(String grokPatternName, String mappingType, String fieldName, String preBreak, String postBreak) { + this.grokPatternName = grokPatternName; + this.mappingType = mappingType; + this.fieldName = fieldName; + // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java + grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + preBreak + + "%{" + grokPatternName + ":" + VALUE + "}" + postBreak + "%{GREEDYDATA:" + EPILOGUE + "}"); + } + + @Override + public boolean matchesAll(Collection snippets) { + return snippets.stream().allMatch(grok::match); + } + + /** + * Given a collection of strings, and a Grok pattern that matches some part of them all, + * return collections of the bits that come before (prefaces) and after (epilogues) the + * bit that matches. + */ + @Override + public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, + Collection epilogues, Map mappings) { + String sampleValue = null; + for (String snippet : snippets) { + Map captures = grok.captures(snippet); + // If the pattern doesn't match then captures will be null + if (captures == null) { + throw new IllegalStateException("[%{" + grokPatternName + "}] does not match snippet [" + snippet + "]"); + } + prefaces.add(captures.getOrDefault(PREFACE, "").toString()); + if (sampleValue == null) { + sampleValue = captures.get(VALUE).toString(); + } + epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); + } + String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); + if (mappings != null) { + Map fullMappingType = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, mappingType); + if ("date".equals(mappingType)) { + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(sampleValue); + if (timestampMatch != null) { + fullMappingType = timestampMatch.getEsDateMappingTypeWithFormat(); + } + } + mappings.put(adjustedFieldName, fullMappingType); + } + return "%{" + grokPatternName + ":" + adjustedFieldName + "}"; + } + } + + /** + * Unlike the {@link ValueOnlyGrokPatternCandidate} an object of this class is not immutable and not thread safe. + * When a given object matches a set of strings it chooses a field name. Then that same field name is used when + * processing captures from the pattern. Hence only a single thread may use any particular instance of this + * class. + */ + static class KeyValueGrokPatternCandidate implements GrokPatternCandidate { + + private static final Pattern kvFinder = Pattern.compile("\\b(\\w+)=[\\w.-]+"); + private final List explanation; + private String fieldName; + + KeyValueGrokPatternCandidate(List explanation) { + this.explanation = explanation; + } + + @Override + public boolean matchesAll(Collection snippets) { + Set candidateNames = new LinkedHashSet<>(); + boolean isFirst = true; + for (String snippet : snippets) { + if (isFirst) { + Matcher matcher = kvFinder.matcher(snippet); + while (matcher.find()) { + candidateNames.add(matcher.group(1)); + } + isFirst = false; + } else { + candidateNames.removeIf(candidateName -> + Pattern.compile("\\b" + candidateName + "=[\\w.-]+").matcher(snippet).find() == false); + } + if (candidateNames.isEmpty()) { + break; + } + } + return (fieldName = candidateNames.stream().findFirst().orElse(null)) != null; + } + + @Override + public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, + Collection epilogues, Map mappings) { + if (fieldName == null) { + throw new IllegalStateException("Cannot process KV matches until a field name has been determined"); + } + Grok grok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}\\b" + + fieldName + "=%{USER:" + VALUE + "}%{GREEDYDATA:" + EPILOGUE + "}"); + Collection values = new ArrayList<>(); + for (String snippet : snippets) { + Map captures = grok.captures(snippet); + // If the pattern doesn't match then captures will be null + if (captures == null) { + throw new IllegalStateException("[\\b" + fieldName + "=%{USER}] does not match snippet [" + snippet + "]"); + } + prefaces.add(captures.getOrDefault(PREFACE, "").toString()); + values.add(captures.getOrDefault(VALUE, "").toString()); + epilogues.add(captures.getOrDefault(EPILOGUE, "").toString()); + } + String adjustedFieldName = buildFieldName(fieldNameCountStore, fieldName); + if (mappings != null) { + mappings.put(adjustedFieldName, LogStructureUtils.guessScalarMapping(explanation, adjustedFieldName, values)); + } + return "\\b" + fieldName + "=%{USER:" + adjustedFieldName + "}"; + } + } + + /** + * A Grok pattern candidate that matches a single named Grok pattern but will not update mappings. + */ + static class NoMappingGrokPatternCandidate extends ValueOnlyGrokPatternCandidate { + + NoMappingGrokPatternCandidate(String grokPatternName, String fieldName) { + super(grokPatternName, null, fieldName); + } + + @Override + public String processCaptures(Map fieldNameCountStore, Collection snippets, Collection prefaces, + Collection epilogues, Map mappings) { + return super.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, null); + } + } + + /** + * Used to check whether a single Grok pattern matches every sample message in its entirety. + */ + static class FullMatchGrokPatternCandidate { + + private final String grokString; + private final String timeField; + private final Grok grok; + + FullMatchGrokPatternCandidate(String grokPatternName, String timeField) { + grokString = "%{" + grokPatternName + "}"; + this.timeField = timeField; + grok = new Grok(Grok.getBuiltinPatterns(), grokString); + } + + public boolean matchesAll(Collection sampleMessages) { + return sampleMessages.stream().allMatch(grok::match); + } + + /** + * This must only be called if {@link #matchesAll} returns true. + * @return A tuple of (time field name, Grok string). + */ + public Tuple processMatch(List explanation, Collection sampleMessages, + Map mappings) { + + explanation.add("A full message Grok pattern [" + grokString.substring(2, grokString.length() - 1) + "] looks appropriate"); + + if (mappings != null) { + Map> valuesPerField = new HashMap<>(); + + for (String sampleMessage : sampleMessages) { + Map captures = grok.captures(sampleMessage); + // If the pattern doesn't match then captures will be null + if (captures == null) { + throw new IllegalStateException("[" + grokString + "] does not match snippet [" + sampleMessage + "]"); + } + for (Map.Entry capture : captures.entrySet()) { + + String fieldName = capture.getKey(); + String fieldValue = capture.getValue().toString(); + + // Exclude the time field because that will be dropped and replaced with @timestamp + if (fieldName.equals(timeField) == false) { + valuesPerField.compute(fieldName, (k, v) -> { + if (v == null) { + return new ArrayList<>(Collections.singletonList(fieldValue)); + } else { + v.add(fieldValue); + return v; + } + }); + } + } + } + + for (Map.Entry> valuesForField : valuesPerField.entrySet()) { + String fieldName = valuesForField.getKey(); + mappings.put(fieldName, + LogStructureUtils.guessScalarMapping(explanation, fieldName, valuesForField.getValue())); + } + } + + return new Tuple<>(timeField, grokString); + } + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java new file mode 100644 index 0000000000000..98e8a0213fbef --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinder.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.stream.Collectors; + +import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent; + +/** + * Really ND-JSON. + */ +public class JsonLogStructureFinder implements LogStructureFinder { + + private final List sampleMessages; + private final LogStructure structure; + + static JsonLogStructureFinder makeJsonLogStructureFinder(List explanation, String sample, String charsetName, + Boolean hasByteOrderMarker) throws IOException { + + List> sampleRecords = new ArrayList<>(); + + List sampleMessages = Arrays.asList(sample.split("\n")); + for (String sampleMessage : sampleMessages) { + XContentParser parser = jsonXContent.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + sampleMessage); + sampleRecords.add(parser.mapOrdered()); + } + + LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.JSON) + .setCharset(charsetName) + .setHasByteOrderMarker(hasByteOrderMarker) + .setSampleStart(sampleMessages.stream().limit(2).collect(Collectors.joining("\n", "", "\n"))) + .setNumLinesAnalyzed(sampleMessages.size()) + .setNumMessagesAnalyzed(sampleRecords.size()); + + Tuple timeField = LogStructureUtils.guessTimestampField(explanation, sampleRecords); + if (timeField != null) { + structureBuilder.setTimestampField(timeField.v1()) + .setTimestampFormats(timeField.v2().dateFormats) + .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing()); + } + + SortedMap mappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + + LogStructure structure = structureBuilder + .setMappings(mappings) + .setExplanation(explanation) + .build(); + + return new JsonLogStructureFinder(sampleMessages, structure); + } + + private JsonLogStructureFinder(List sampleMessages, LogStructure structure) { + this.sampleMessages = Collections.unmodifiableList(sampleMessages); + this.structure = structure; + } + + @Override + public List getSampleMessages() { + return sampleMessages; + } + + @Override + public LogStructure getStructure() { + return structure; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactory.java new file mode 100644 index 0000000000000..c5da103eb0560 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactory.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.xcontent.DeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.io.StringReader; +import java.util.List; +import java.util.Locale; + +import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent; + +public class JsonLogStructureFinderFactory implements LogStructureFinderFactory { + + /** + * This format matches if the sample consists of one or more JSON documents. + * If there is more than one, they must be newline-delimited. The + * documents must be non-empty, to prevent lines containing "{}" from matching. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + + int completeDocCount = 0; + + try { + String[] sampleLines = sample.split("\n"); + for (String sampleLine : sampleLines) { + try (XContentParser parser = jsonXContent.createParser(NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, new ContextPrintingStringReader(sampleLine))) { + + if (parser.map().isEmpty()) { + explanation.add("Not JSON because an empty object was parsed: [" + sampleLine + "]"); + return false; + } + ++completeDocCount; + if (parser.nextToken() != null) { + explanation.add("Not newline delimited JSON because a line contained more than a single object: [" + + sampleLine + "]"); + return false; + } + } + } + } catch (IOException | IllegalStateException e) { + explanation.add("Not JSON because there was a parsing exception: [" + e.getMessage().replaceAll("\\s?\r?\n\\s?", " ") + "]"); + return false; + } + + if (completeDocCount == 0) { + explanation.add("Not JSON because sample didn't contain a complete document"); + return false; + } + + explanation.add("Deciding sample is newline delimited JSON"); + return true; + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return JsonLogStructureFinder.makeJsonLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker); + } + + private static class ContextPrintingStringReader extends StringReader { + + private final String str; + + ContextPrintingStringReader(String str) { + super(str); + this.str = str; + } + + @Override + public String toString() { + if (str.length() <= 80) { + return String.format(Locale.ROOT, "\"%s\"", str); + } else { + return String.format(Locale.ROOT, "\"%.77s...\"", str); + } + } + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java new file mode 100644 index 0000000000000..64a00d20899c1 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructure.java @@ -0,0 +1,614 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Stores the log file format determined by a {@link LogStructureFinder}. + */ +public class LogStructure implements ToXContentObject { + + public enum Format { + + JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT; + + public Character separator() { + switch (this) { + case JSON: + case XML: + return null; + case CSV: + return ','; + case TSV: + return '\t'; + case SEMI_COLON_SEPARATED_VALUES: + return ';'; + case PIPE_SEPARATED_VALUES: + return '|'; + case SEMI_STRUCTURED_TEXT: + return null; + default: + throw new IllegalStateException("enum value [" + this + "] missing from switch."); + } + } + + public boolean supportsNesting() { + switch (this) { + case JSON: + case XML: + return true; + case CSV: + case TSV: + case SEMI_COLON_SEPARATED_VALUES: + case PIPE_SEPARATED_VALUES: + case SEMI_STRUCTURED_TEXT: + return false; + default: + throw new IllegalStateException("enum value [" + this + "] missing from switch."); + } + } + + public boolean isStructured() { + switch (this) { + case JSON: + case XML: + case CSV: + case TSV: + case SEMI_COLON_SEPARATED_VALUES: + case PIPE_SEPARATED_VALUES: + return true; + case SEMI_STRUCTURED_TEXT: + return false; + default: + throw new IllegalStateException("enum value [" + this + "] missing from switch."); + } + } + + public boolean isSemiStructured() { + switch (this) { + case JSON: + case XML: + case CSV: + case TSV: + case SEMI_COLON_SEPARATED_VALUES: + case PIPE_SEPARATED_VALUES: + return false; + case SEMI_STRUCTURED_TEXT: + return true; + default: + throw new IllegalStateException("enum value [" + this + "] missing from switch."); + } + } + + public boolean isSeparatedValues() { + switch (this) { + case JSON: + case XML: + return false; + case CSV: + case TSV: + case SEMI_COLON_SEPARATED_VALUES: + case PIPE_SEPARATED_VALUES: + return true; + case SEMI_STRUCTURED_TEXT: + return false; + default: + throw new IllegalStateException("enum value [" + this + "] missing from switch."); + } + } + + public static Format fromSeparator(char separator) { + switch (separator) { + case ',': + return CSV; + case '\t': + return TSV; + case ';': + return SEMI_COLON_SEPARATED_VALUES; + case '|': + return PIPE_SEPARATED_VALUES; + default: + throw new IllegalArgumentException("No known format has separator [" + separator + "]"); + } + } + + public static Format fromString(String name) { + return valueOf(name.trim().toUpperCase(Locale.ROOT)); + } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } + } + + static final ParseField NUM_LINES_ANALYZED = new ParseField("num_lines_analyzed"); + static final ParseField NUM_MESSAGES_ANALYZED = new ParseField("num_messages_analyzed"); + static final ParseField SAMPLE_START = new ParseField("sample_start"); + static final ParseField CHARSET = new ParseField("charset"); + static final ParseField HAS_BYTE_ORDER_MARKER = new ParseField("has_byte_order_marker"); + static final ParseField STRUCTURE = new ParseField("format"); + static final ParseField MULTILINE_START_PATTERN = new ParseField("multiline_start_pattern"); + static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern"); + static final ParseField INPUT_FIELDS = new ParseField("input_fields"); + static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row"); + static final ParseField SEPARATOR = new ParseField("separator"); + static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields"); + static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); + static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field"); + static final ParseField TIMESTAMP_FORMATS = new ParseField("timestamp_formats"); + static final ParseField NEED_CLIENT_TIMEZONE = new ParseField("need_client_timezone"); + static final ParseField MAPPINGS = new ParseField("mappings"); + static final ParseField EXPLANATION = new ParseField("explanation"); + + public static final ObjectParser PARSER = new ObjectParser<>("log_file_structure", false, Builder::new); + + static { + PARSER.declareInt(Builder::setNumLinesAnalyzed, NUM_LINES_ANALYZED); + PARSER.declareInt(Builder::setNumMessagesAnalyzed, NUM_MESSAGES_ANALYZED); + PARSER.declareString(Builder::setSampleStart, SAMPLE_START); + PARSER.declareString(Builder::setCharset, CHARSET); + PARSER.declareBoolean(Builder::setHasByteOrderMarker, HAS_BYTE_ORDER_MARKER); + PARSER.declareString((p, c) -> p.setFormat(Format.fromString(c)), STRUCTURE); + PARSER.declareString(Builder::setMultilineStartPattern, MULTILINE_START_PATTERN); + PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN); + PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS); + PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW); + PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR); + PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS); + PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN); + PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD); + PARSER.declareStringArray(Builder::setTimestampFormats, TIMESTAMP_FORMATS); + PARSER.declareBoolean(Builder::setNeedClientTimezone, NEED_CLIENT_TIMEZONE); + PARSER.declareObject(Builder::setMappings, (p, c) -> new TreeMap<>(p.map()), MAPPINGS); + PARSER.declareStringArray(Builder::setExplanation, EXPLANATION); + } + + private final int numLinesAnalyzed; + private final int numMessagesAnalyzed; + private final String sampleStart; + private final String charset; + private final Boolean hasByteOrderMarker; + private final Format format; + private final String multilineStartPattern; + private final String excludeLinesPattern; + private final List inputFields; + private final Boolean hasHeaderRow; + private final Character separator; + private final Boolean shouldTrimFields; + private final String grokPattern; + private final List timestampFormats; + private final String timestampField; + private final boolean needClientTimezone; + private final SortedMap mappings; + private final List explanation; + + public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker, + Format format, String multilineStartPattern, String excludeLinesPattern, List inputFields, + Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField, + List timestampFormats, boolean needClientTimezone, Map mappings, + List explanation) { + + this.numLinesAnalyzed = numLinesAnalyzed; + this.numMessagesAnalyzed = numMessagesAnalyzed; + this.sampleStart = Objects.requireNonNull(sampleStart); + this.charset = Objects.requireNonNull(charset); + this.hasByteOrderMarker = hasByteOrderMarker; + this.format = Objects.requireNonNull(format); + this.multilineStartPattern = multilineStartPattern; + this.excludeLinesPattern = excludeLinesPattern; + this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields)); + this.hasHeaderRow = hasHeaderRow; + this.separator = separator; + this.shouldTrimFields = shouldTrimFields; + this.grokPattern = grokPattern; + this.timestampField = timestampField; + this.timestampFormats = (timestampFormats == null) ? null : Collections.unmodifiableList(new ArrayList<>(timestampFormats)); + this.needClientTimezone = needClientTimezone; + this.mappings = Collections.unmodifiableSortedMap(new TreeMap<>(mappings)); + this.explanation = Collections.unmodifiableList(new ArrayList<>(explanation)); + } + + public int getNumLinesAnalyzed() { + return numLinesAnalyzed; + } + + public int getNumMessagesAnalyzed() { + return numMessagesAnalyzed; + } + + public String getSampleStart() { + return sampleStart; + } + + public String getCharset() { + return charset; + } + + public Boolean getHasByteOrderMarker() { + return hasByteOrderMarker; + } + + public Format getFormat() { + return format; + } + + public String getMultilineStartPattern() { + return multilineStartPattern; + } + + public String getExcludeLinesPattern() { + return excludeLinesPattern; + } + + public List getInputFields() { + return inputFields; + } + + public Boolean getHasHeaderRow() { + return hasHeaderRow; + } + + public Character getSeparator() { + return separator; + } + + public Boolean getShouldTrimFields() { + return shouldTrimFields; + } + + public String getGrokPattern() { + return grokPattern; + } + + public String getTimestampField() { + return timestampField; + } + + public List getTimestampFormats() { + return timestampFormats; + } + + public boolean needClientTimezone() { + return needClientTimezone; + } + + public SortedMap getMappings() { + return mappings; + } + + public List getExplanation() { + return explanation; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + + builder.startObject(); + builder.field(NUM_LINES_ANALYZED.getPreferredName(), numLinesAnalyzed); + builder.field(NUM_MESSAGES_ANALYZED.getPreferredName(), numMessagesAnalyzed); + builder.field(SAMPLE_START.getPreferredName(), sampleStart); + builder.field(CHARSET.getPreferredName(), charset); + if (hasByteOrderMarker != null) { + builder.field(HAS_BYTE_ORDER_MARKER.getPreferredName(), hasByteOrderMarker.booleanValue()); + } + builder.field(STRUCTURE.getPreferredName(), format); + if (multilineStartPattern != null && multilineStartPattern.isEmpty() == false) { + builder.field(MULTILINE_START_PATTERN.getPreferredName(), multilineStartPattern); + } + if (excludeLinesPattern != null && excludeLinesPattern.isEmpty() == false) { + builder.field(EXCLUDE_LINES_PATTERN.getPreferredName(), excludeLinesPattern); + } + if (inputFields != null && inputFields.isEmpty() == false) { + builder.field(INPUT_FIELDS.getPreferredName(), inputFields); + } + if (hasHeaderRow != null) { + builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue()); + } + if (separator != null) { + builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator)); + } + if (shouldTrimFields != null) { + builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue()); + } + if (grokPattern != null && grokPattern.isEmpty() == false) { + builder.field(GROK_PATTERN.getPreferredName(), grokPattern); + } + if (timestampField != null && timestampField.isEmpty() == false) { + builder.field(TIMESTAMP_FIELD.getPreferredName(), timestampField); + } + if (timestampFormats != null && timestampFormats.isEmpty() == false) { + builder.field(TIMESTAMP_FORMATS.getPreferredName(), timestampFormats); + } + builder.field(NEED_CLIENT_TIMEZONE.getPreferredName(), needClientTimezone); + builder.field(MAPPINGS.getPreferredName(), mappings); + builder.field(EXPLANATION.getPreferredName(), explanation); + builder.endObject(); + + return builder; + } + + @Override + public int hashCode() { + + return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, + multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField, + timestampFormats, needClientTimezone, mappings, explanation); + } + + @Override + public boolean equals(Object other) { + + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + LogStructure that = (LogStructure) other; + return this.numLinesAnalyzed == that.numLinesAnalyzed && + this.numMessagesAnalyzed == that.numMessagesAnalyzed && + this.needClientTimezone == that.needClientTimezone && + Objects.equals(this.sampleStart, that.sampleStart) && + Objects.equals(this.charset, that.charset) && + Objects.equals(this.hasByteOrderMarker, that.hasByteOrderMarker) && + Objects.equals(this.format, that.format) && + Objects.equals(this.multilineStartPattern, that.multilineStartPattern) && + Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) && + Objects.equals(this.inputFields, that.inputFields) && + Objects.equals(this.hasHeaderRow, that.hasHeaderRow) && + Objects.equals(this.separator, that.separator) && + Objects.equals(this.shouldTrimFields, that.shouldTrimFields) && + Objects.equals(this.grokPattern, that.grokPattern) && + Objects.equals(this.timestampField, that.timestampField) && + Objects.equals(this.timestampFormats, that.timestampFormats) && + Objects.equals(this.mappings, that.mappings) && + Objects.equals(this.explanation, that.explanation); + } + + public static class Builder { + + private int numLinesAnalyzed; + private int numMessagesAnalyzed; + private String sampleStart; + private String charset; + private Boolean hasByteOrderMarker; + private Format format; + private String multilineStartPattern; + private String excludeLinesPattern; + private List inputFields; + private Boolean hasHeaderRow; + private Character separator; + private Boolean shouldTrimFields; + private String grokPattern; + private String timestampField; + private List timestampFormats; + private boolean needClientTimezone; + private Map mappings; + private List explanation; + + public Builder() { + this(Format.SEMI_STRUCTURED_TEXT); + } + + public Builder(Format format) { + setFormat(format); + } + + public Builder setNumLinesAnalyzed(int numLinesAnalyzed) { + this.numLinesAnalyzed = numLinesAnalyzed; + return this; + } + + public Builder setNumMessagesAnalyzed(int numMessagesAnalyzed) { + this.numMessagesAnalyzed = numMessagesAnalyzed; + return this; + } + + public Builder setSampleStart(String sampleStart) { + this.sampleStart = Objects.requireNonNull(sampleStart); + return this; + } + + public Builder setCharset(String charset) { + this.charset = Objects.requireNonNull(charset); + return this; + } + + public Builder setHasByteOrderMarker(Boolean hasByteOrderMarker) { + this.hasByteOrderMarker = hasByteOrderMarker; + return this; + } + + public Builder setFormat(Format format) { + this.format = Objects.requireNonNull(format); + this.separator = format.separator(); + return this; + } + + public Builder setMultilineStartPattern(String multilineStartPattern) { + this.multilineStartPattern = multilineStartPattern; + return this; + } + + public Builder setExcludeLinesPattern(String excludeLinesPattern) { + this.excludeLinesPattern = excludeLinesPattern; + return this; + } + + public Builder setInputFields(List inputFields) { + this.inputFields = inputFields; + return this; + } + + public Builder setHasHeaderRow(Boolean hasHeaderRow) { + this.hasHeaderRow = hasHeaderRow; + return this; + } + + public Builder setShouldTrimFields(Boolean shouldTrimFields) { + this.shouldTrimFields = shouldTrimFields; + return this; + } + + public Builder setSeparator(Character separator) { + this.separator = separator; + return this; + } + + public Builder setGrokPattern(String grokPattern) { + this.grokPattern = grokPattern; + return this; + } + + public Builder setTimestampField(String timestampField) { + this.timestampField = timestampField; + return this; + } + + public Builder setTimestampFormats(List timestampFormats) { + this.timestampFormats = timestampFormats; + return this; + } + + public Builder setNeedClientTimezone(boolean needClientTimezone) { + this.needClientTimezone = needClientTimezone; + return this; + } + + public Builder setMappings(Map mappings) { + this.mappings = Objects.requireNonNull(mappings); + return this; + } + + public Builder setExplanation(List explanation) { + this.explanation = Objects.requireNonNull(explanation); + return this; + } + + @SuppressWarnings("fallthrough") + public LogStructure build() { + + if (numLinesAnalyzed <= 0) { + throw new IllegalArgumentException("Number of lines analyzed must be positive."); + } + + if (numMessagesAnalyzed <= 0) { + throw new IllegalArgumentException("Number of messages analyzed must be positive."); + } + + if (numMessagesAnalyzed > numLinesAnalyzed) { + throw new IllegalArgumentException("Number of messages analyzed cannot be greater than number of lines analyzed."); + } + + if (sampleStart == null || sampleStart.isEmpty()) { + throw new IllegalArgumentException("Sample start must be specified."); + } + + if (charset == null || charset.isEmpty()) { + throw new IllegalArgumentException("A character set must be specified."); + } + + if (charset.toUpperCase(Locale.ROOT).startsWith("UTF") == false && hasByteOrderMarker != null) { + throw new IllegalArgumentException("A byte order marker is only possible for UTF character sets."); + } + + switch (format) { + case JSON: + if (shouldTrimFields != null) { + throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures."); + } + // $FALL-THROUGH$ + case XML: + if (hasHeaderRow != null) { + throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); + } + if (separator != null) { + throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); + } + if (grokPattern != null) { + throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); + } + break; + case CSV: + case TSV: + case SEMI_COLON_SEPARATED_VALUES: + case PIPE_SEPARATED_VALUES: + if (inputFields == null || inputFields.isEmpty()) { + throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures."); + } + if (hasHeaderRow == null) { + throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures."); + } + Character expectedSeparator = format.separator(); + assert expectedSeparator != null; + if (expectedSeparator.equals(separator) == false) { + throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format + + "] structures."); + } + if (grokPattern != null) { + throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); + } + break; + case SEMI_STRUCTURED_TEXT: + if (inputFields != null) { + throw new IllegalArgumentException("Input fields may not be specified for [" + format + "] structures."); + } + if (hasHeaderRow != null) { + throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); + } + if (separator != null) { + throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); + } + if (shouldTrimFields != null) { + throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures."); + } + if (grokPattern == null || grokPattern.isEmpty()) { + throw new IllegalArgumentException("Grok pattern must be specified for [" + format + "] structures."); + } + break; + default: + throw new IllegalStateException("enum value [" + format + "] missing from switch."); + } + + if ((timestampField == null) != (timestampFormats == null || timestampFormats.isEmpty())) { + throw new IllegalArgumentException("Timestamp field and timestamp formats must both be specified or neither be specified."); + } + + if (needClientTimezone && timestampField == null) { + throw new IllegalArgumentException("Client timezone cannot be needed if there is no timestamp field."); + } + + if (mappings == null || mappings.isEmpty()) { + throw new IllegalArgumentException("Mappings must be specified."); + } + + if (explanation == null || explanation.isEmpty()) { + throw new IllegalArgumentException("Explanation must be specified."); + } + + return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, + multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, + timestampField, timestampFormats, needClientTimezone, mappings, explanation); + } + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinder.java new file mode 100644 index 0000000000000..ea2e9efc5fb34 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinder.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.List; + +public interface LogStructureFinder { + + /** + * The (possibly multi-line) messages that the log sample was split into. + * @return A list of messages. + */ + List getSampleMessages(); + + /** + * Retrieve the structure of the log file used to instantiate the finder. + * @return The log file structure. + */ + LogStructure getStructure(); +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderFactory.java new file mode 100644 index 0000000000000..af322ee4bf0e0 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.List; + +public interface LogStructureFinderFactory { + + /** + * Given a sample of a log file, decide whether this factory will be able + * to create an appropriate object to represent its ingestion configs. + * @param explanation List of reasons for making decisions. May contain items when passed and new reasons + * can be appended by this method. + * @param sample A sample from the log file to be ingested. + * @return true if this factory can create an appropriate log + * file structure given the sample; otherwise false. + */ + boolean canCreateFromSample(List explanation, String sample); + + /** + * Create an object representing the structure of a log file. + * @param explanation List of reasons for making decisions. May contain items when passed and new reasons + * can be appended by this method. + * @param sample A sample from the log file to be ingested. + * @param charsetName The name of the character set in which the sample was provided. + * @param hasByteOrderMarker Did the sample have a byte order marker? null means "not relevant". + * @return A log file structure object suitable for ingesting the supplied sample. + * @throws Exception if something goes wrong during creation. + */ + LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws Exception; +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java new file mode 100644 index 0000000000000..7f18445e505e3 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java @@ -0,0 +1,232 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; +import org.elasticsearch.common.collect.Tuple; + +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Optional; +import java.util.Set; + +/** + * Runs the high-level steps needed to create ingest configs for the specified log file. In order: + * 1. Determine the most likely character set (UTF-8, UTF-16LE, ISO-8859-2, etc.) + * 2. Load a sample of the file, consisting of the first 1000 lines of the file + * 3. Determine the most likely file structure - one of ND-JSON, XML, CSV, TSV or semi-structured text + * 4. Create an appropriate structure object and delegate writing configs to it + */ +public final class LogStructureFinderManager { + + public static final int MIN_SAMPLE_LINE_COUNT = 2; + + static final Set FILEBEAT_SUPPORTED_ENCODINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( + "866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252", + "cp1253", "cp1254", "cp1255", "cp1256", "cp1257", "cp1258", "cp819", "cp866", "csbig5", "cseuckr", "cseucpkdfmtjapanese", + "csgb2312", "csibm866", "csiso2022jp", "csiso2022kr", "csiso58gb231280", "csiso88596e", "csiso88596i", "csiso88598e", "csiso88598i", + "csisolatin1", "csisolatin2", "csisolatin3", "csisolatin4", "csisolatin5", "csisolatin6", "csisolatin9", "csisolatinarabic", + "csisolatincyrillic", "csisolatingreek", "csisolatinhebrew", "cskoi8r", "csksc56011987", "csmacintosh", "csshiftjis", "cyrillic", + "dos-874", "ecma-114", "ecma-118", "elot_928", "euc-jp", "euc-kr", "gb18030", "gb2312", "gb_2312", "gb_2312-80", "gbk", "greek", + "greek8", "hebrew", "hz-gb-2312", "ibm819", "ibm866", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-jp", "iso-2022-kr", "iso-8859-1", + "iso-8859-10", "iso-8859-11", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "iso-8859-2", "iso-8859-3", "iso-8859-4", + "iso-8859-5", "iso-8859-6", "iso-8859-6-e", "iso-8859-6-i", "iso-8859-7", "iso-8859-8", "iso-8859-8-e", "iso-8859-8-i", + "iso-8859-9", "iso-ir-100", "iso-ir-101", "iso-ir-109", "iso-ir-110", "iso-ir-126", "iso-ir-127", "iso-ir-138", "iso-ir-144", + "iso-ir-148", "iso-ir-149", "iso-ir-157", "iso-ir-58", "iso8859-1", "iso8859-10", "iso8859-11", "iso8859-13", "iso8859-14", + "iso8859-15", "iso8859-2", "iso8859-3", "iso8859-4", "iso8859-5", "iso8859-6", "iso8859-6e", "iso8859-6i", "iso8859-7", "iso8859-8", + "iso8859-8e", "iso8859-8i", "iso8859-9", "iso88591", "iso885910", "iso885911", "iso885913", "iso885914", "iso885915", "iso88592", + "iso88593", "iso88594", "iso88595", "iso88596", "iso88597", "iso88598", "iso88599", "iso_8859-1", "iso_8859-15", "iso_8859-1:1987", + "iso_8859-2", "iso_8859-2:1987", "iso_8859-3", "iso_8859-3:1988", "iso_8859-4", "iso_8859-4:1988", "iso_8859-5", "iso_8859-5:1988", + "iso_8859-6", "iso_8859-6:1987", "iso_8859-7", "iso_8859-7:1987", "iso_8859-8", "iso_8859-8:1988", "iso_8859-9", "iso_8859-9:1989", + "koi", "koi8", "koi8-r", "koi8-ru", "koi8-u", "koi8_r", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "l1", + "l2", "l3", "l4", "l5", "l6", "l9", "latin1", "latin2", "latin3", "latin4", "latin5", "latin6", "logical", "mac", "macintosh", + "ms932", "ms_kanji", "shift-jis", "shift_jis", "sjis", "sun_eu_greek", "tis-620", "unicode-1-1-utf-8", "us-ascii", "utf-16", + "utf-16-bom", "utf-16be", "utf-16be-bom", "utf-16le", "utf-16le-bom", "utf-8", "utf8", "visual", "windows-1250", "windows-1251", + "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "windows-31j", + "windows-874", "windows-949", "x-cp1250", "x-cp1251", "x-cp1252", "x-cp1253", "x-cp1254", "x-cp1255", "x-cp1256", "x-cp1257", + "x-cp1258", "x-euc-jp", "x-gbk", "x-mac-cyrillic", "x-mac-roman", "x-mac-ukrainian", "x-sjis", "x-x-big5" + ))); + + /** + * These need to be ordered so that the more generic formats come after the more specific ones + */ + private static final List ORDERED_STRUCTURE_FACTORIES = Collections.unmodifiableList(Arrays.asList( + new JsonLogStructureFinderFactory(), + new XmlLogStructureFinderFactory(), + // ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV + new CsvLogStructureFinderFactory(), + new TsvLogStructureFinderFactory(), + new SemiColonSeparatedValuesLogStructureFinderFactory(), + new PipeSeparatedValuesLogStructureFinderFactory(), + new TextLogStructureFinderFactory() + )); + + private static final int BUFFER_SIZE = 8192; + + /** + * Given a stream of data from some log file, determine its structure. + * @param idealSampleLineCount Ideally, how many lines from the stream will be read to determine the structure? + * If the stream has fewer lines then an attempt will still be made, providing at + * least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. + * @param fromFile A stream from which the sample will be read. + * @return A {@link LogStructureFinder} object from which the structure and messages can be queried. + * @throws Exception A variety of problems could occur at various stages of the structure finding process. + */ + public LogStructureFinder findLogStructure(int idealSampleLineCount, InputStream fromFile) throws Exception { + return findLogStructure(new ArrayList<>(), idealSampleLineCount, fromFile); + } + + public LogStructureFinder findLogStructure(List explanation, int idealSampleLineCount, InputStream fromFile) + throws Exception { + + CharsetMatch charsetMatch = findCharset(explanation, fromFile); + String charsetName = charsetMatch.getName(); + + Tuple sampleInfo = sampleFile(charsetMatch.getReader(), charsetName, MIN_SAMPLE_LINE_COUNT, + Math.max(MIN_SAMPLE_LINE_COUNT, idealSampleLineCount)); + + return makeBestStructureFinder(explanation, sampleInfo.v1(), charsetName, sampleInfo.v2()); + } + + CharsetMatch findCharset(List explanation, InputStream inputStream) throws Exception { + + // We need an input stream that supports mark and reset, so wrap the argument + // in a BufferedInputStream if it doesn't already support this feature + if (inputStream.markSupported() == false) { + inputStream = new BufferedInputStream(inputStream, BUFFER_SIZE); + } + + // This is from ICU4J + CharsetDetector charsetDetector = new CharsetDetector().setText(inputStream); + CharsetMatch[] charsetMatches = charsetDetector.detectAll(); + + // Determine some extra characteristics of the input to compensate for some deficiencies of ICU4J + boolean pureAscii = true; + boolean containsZeroBytes = false; + inputStream.mark(BUFFER_SIZE); + byte[] workspace = new byte[BUFFER_SIZE]; + int remainingLength = BUFFER_SIZE; + do { + int bytesRead = inputStream.read(workspace, 0, remainingLength); + if (bytesRead <= 0) { + break; + } + for (int i = 0; i < bytesRead && containsZeroBytes == false; ++i) { + if (workspace[i] == 0) { + containsZeroBytes = true; + pureAscii = false; + } else { + pureAscii = pureAscii && workspace[i] > 0 && workspace[i] < 128; + } + } + remainingLength -= bytesRead; + } while (containsZeroBytes == false && remainingLength > 0); + inputStream.reset(); + + if (pureAscii) { + // If the input is pure ASCII then many single byte character sets will match. We want to favour + // UTF-8 in this case, as it avoids putting a bold declaration of a dubious character set choice + // in the config files. + Optional utf8CharsetMatch = Arrays.stream(charsetMatches) + .filter(charsetMatch -> StandardCharsets.UTF_8.name().equals(charsetMatch.getName())).findFirst(); + if (utf8CharsetMatch.isPresent()) { + explanation.add("Using character encoding [" + StandardCharsets.UTF_8.name() + + "], which matched the input with [" + utf8CharsetMatch.get().getConfidence() + "%] confidence - first [" + + (BUFFER_SIZE / 1024) + "kB] of input was pure ASCII"); + return utf8CharsetMatch.get(); + } + } + + // Input wasn't pure ASCII, so use the best matching character set that's supported by both Java and Go. + // Additionally, if the input contains zero bytes then avoid single byte character sets, as ICU4J will + // suggest these for binary files but then + for (CharsetMatch charsetMatch : charsetMatches) { + String name = charsetMatch.getName(); + if (Charset.isSupported(name) && FILEBEAT_SUPPORTED_ENCODINGS.contains(name.toLowerCase(Locale.ROOT))) { + + // This extra test is to avoid trying to read binary files as text. Running the log config + // deduction algorithms on binary files is very slow as the binary files generally appear to + // have very long lines. + boolean spaceEncodingContainsZeroByte = false; + byte[] spaceBytes = " ".getBytes(name); + for (int i = 0; i < spaceBytes.length && spaceEncodingContainsZeroByte == false; ++i) { + spaceEncodingContainsZeroByte = (spaceBytes[i] == 0); + } + if (containsZeroBytes && spaceEncodingContainsZeroByte == false) { + explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch.getConfidence() + + "%] confidence but was rejected as the input contains zero bytes and the [" + name + "] encoding does not"); + } else { + explanation.add("Using character encoding [" + name + "], which matched the input with [" + + charsetMatch.getConfidence() + "%] confidence"); + return charsetMatch; + } + } else { + explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch.getConfidence() + + "%] confidence but was rejected as it is not supported by [" + + (Charset.isSupported(name) ? "Filebeat" : "the JVM") + "]"); + } + } + + throw new IllegalArgumentException("Could not determine a usable character encoding for the input" + + (containsZeroBytes ? " - could it be binary data?" : "")); + } + + LogStructureFinder makeBestStructureFinder(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws Exception { + + for (LogStructureFinderFactory factory : ORDERED_STRUCTURE_FACTORIES) { + if (factory.canCreateFromSample(explanation, sample)) { + return factory.createFromSample(explanation, sample, charsetName, hasByteOrderMarker); + } + } + throw new IllegalArgumentException("Input did not match any known formats"); + } + + private Tuple sampleFile(Reader reader, String charsetName, int minLines, int maxLines) throws IOException { + + int lineCount = 0; + BufferedReader bufferedReader = new BufferedReader(reader); + StringBuilder sample = new StringBuilder(); + + // Don't include any byte-order-marker in the sample. (The logic to skip it works for both + // UTF-8 and UTF-16 assuming the character set of the reader was correctly detected.) + Boolean hasByteOrderMarker = null; + if (charsetName.toUpperCase(Locale.ROOT).startsWith("UTF")) { + int maybeByteOrderMarker = reader.read(); + hasByteOrderMarker = ((char) maybeByteOrderMarker == '\uFEFF'); + if (maybeByteOrderMarker >= 0 && hasByteOrderMarker == false && (char) maybeByteOrderMarker != '\r') + { + sample.appendCodePoint(maybeByteOrderMarker); + if ((char) maybeByteOrderMarker == '\n') { + ++lineCount; + } + } + } + + String line; + while ((line = bufferedReader.readLine()) != null && ++lineCount <= maxLines) { + sample.append(line).append('\n'); + } + + if (lineCount < minLines) { + throw new IllegalArgumentException("Input contained too few lines to sample"); + } + + return new Tuple<>(sample.toString(), hasByteOrderMarker); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java new file mode 100644 index 0000000000000..b1dfee22ee64a --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtils.java @@ -0,0 +1,238 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.grok.Grok; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +final class LogStructureUtils { + + static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp"; + static final String MAPPING_TYPE_SETTING = "type"; + static final String MAPPING_FORMAT_SETTING = "format"; + static final String MAPPING_PROPERTIES_SETTING = "properties"; + + // NUMBER Grok pattern doesn't support scientific notation, so we extend it + private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$"); + private static final Grok IP_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{IP}$"); + private static final int KEYWORD_MAX_LEN = 256; + private static final int KEYWORD_MAX_SPACES = 5; + + private LogStructureUtils() { + } + + /** + * Given one or more sample records, find a timestamp field that is consistently present in them all. + * To be returned the timestamp field: + * - Must exist in every record + * - Must have the same timestamp format in every record + * If multiple fields meet these criteria then the one that occurred first in the first sample record + * is chosen. + * @param explanation List of reasons for choosing the overall log structure. This list + * may be non-empty when the method is called, and this method may + * append to it. + * @param sampleRecords List of records derived from the provided log sample. + * @return A tuple of (field name, timestamp format) if one can be found, or null if + * there is no consistent timestamp. + */ + static Tuple guessTimestampField(List explanation, List> sampleRecords) { + if (sampleRecords.isEmpty()) { + return null; + } + + // Accept the first match from the first sample that is compatible with all the other samples + for (Tuple candidate : findCandidates(explanation, sampleRecords)) { + + boolean allGood = true; + for (Map sampleRecord : sampleRecords.subList(1, sampleRecords.size())) { + Object fieldValue = sampleRecord.get(candidate.v1()); + if (fieldValue == null) { + explanation.add("First sample match [" + candidate.v1() + "] ruled out because record [" + sampleRecord + + "] doesn't have field"); + allGood = false; + break; + } + + TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(fieldValue.toString()); + if (match == null || match.candidateIndex != candidate.v2().candidateIndex) { + explanation.add("First sample match [" + candidate.v1() + "] ruled out because record [" + sampleRecord + + "] matches differently: [" + match + "]"); + allGood = false; + break; + } + } + + if (allGood) { + explanation.add("Guessing timestamp field is [" + candidate.v1() + "] with format [" + candidate.v2() + "]"); + return candidate; + } + } + + return null; + } + + private static List> findCandidates(List explanation, List> sampleRecords) { + + List> candidates = new ArrayList<>(); + + // Get candidate timestamps from the first sample record + for (Map.Entry entry : sampleRecords.get(0).entrySet()) { + Object value = entry.getValue(); + if (value != null) { + TimestampMatch match = TimestampFormatFinder.findFirstFullMatch(value.toString()); + if (match != null) { + Tuple candidate = new Tuple<>(entry.getKey(), match); + candidates.add(candidate); + explanation.add("First sample timestamp match [" + candidate + "]"); + } + } + } + + return candidates; + } + + /** + * Given the sampled records, guess appropriate Elasticsearch mappings. + * @param sampleRecords The sampled records. + * @return A map of field name to mapping settings. + */ + static SortedMap guessMappings(List explanation, List> sampleRecords) { + + SortedMap mappings = new TreeMap<>(); + + for (Map sampleRecord : sampleRecords) { + for (String fieldName : sampleRecord.keySet()) { + mappings.computeIfAbsent(fieldName, key -> guessMapping(explanation, fieldName, + sampleRecords.stream().flatMap(record -> { + Object fieldValue = record.get(fieldName); + return (fieldValue == null) ? Stream.empty() : Stream.of(fieldValue); + } + ).collect(Collectors.toList()))); + } + } + + return mappings; + } + + static Map guessMapping(List explanation, String fieldName, List fieldValues) { + + if (fieldValues == null || fieldValues.isEmpty()) { + // We can get here if all the records that contained a given field had a null value for it. + // In this case it's best not to make any statement about what the mapping type should be. + return null; + } + + if (fieldValues.stream().anyMatch(value -> value instanceof Map)) { + if (fieldValues.stream().allMatch(value -> value instanceof Map)) { + return Collections.singletonMap(MAPPING_TYPE_SETTING, "object"); + } + throw new IllegalArgumentException("Field [" + fieldName + + "] has both object and non-object values - this is not supported by Elasticsearch"); + } + + if (fieldValues.stream().anyMatch(value -> value instanceof List || value instanceof Object[])) { + // Elasticsearch fields can be either arrays or single values, but array values must all have the same type + return guessMapping(explanation, fieldName, + fieldValues.stream().flatMap(LogStructureUtils::flatten).collect(Collectors.toList())); + } + + return guessScalarMapping(explanation, fieldName, fieldValues.stream().map(Object::toString).collect(Collectors.toList())); + } + + private static Stream flatten(Object value) { + if (value instanceof List) { + @SuppressWarnings("unchecked") + List objectList = (List) value; + return objectList.stream(); + } else if (value instanceof Object[]) { + return Arrays.stream((Object[]) value); + } else { + return Stream.of(value); + } + } + + /** + * Given some sample values for a field, guess the most appropriate index mapping for the + * field. + * @param explanation List of reasons for choosing the overall log structure. This list + * may be non-empty when the method is called, and this method may + * append to it. + * @param fieldName Name of the field for which mappings are to be guessed. + * @param fieldValues Values of the field for which mappings are to be guessed. The guessed + * mapping will be compatible with all the provided values. Must not be + * empty. + * @return The sub-section of the index mappings most appropriate for the field, + * for example { "type" : "keyword" }. + */ + static Map guessScalarMapping(List explanation, String fieldName, Collection fieldValues) { + + assert fieldValues.isEmpty() == false; + + if (fieldValues.stream().allMatch(value -> "true".equals(value) || "false".equals(value))) { + return Collections.singletonMap(MAPPING_TYPE_SETTING, "boolean"); + } + + // This checks if a date mapping would be appropriate, and, if so, finds the correct format + Iterator iter = fieldValues.iterator(); + TimestampMatch timestampMatch = TimestampFormatFinder.findFirstFullMatch(iter.next()); + while (timestampMatch != null && iter.hasNext()) { + // To be mapped as type date all the values must match the same date format - it is + // not acceptable for all values to be dates, but with different formats + if (timestampMatch.equals(TimestampFormatFinder.findFirstFullMatch(iter.next(), timestampMatch.candidateIndex)) == false) { + timestampMatch = null; + } + } + if (timestampMatch != null) { + return timestampMatch.getEsDateMappingTypeWithFormat(); + } + + if (fieldValues.stream().allMatch(NUMBER_GROK::match)) { + try { + fieldValues.forEach(Long::parseLong); + return Collections.singletonMap(MAPPING_TYPE_SETTING, "long"); + } catch (NumberFormatException e) { + explanation.add("Rejecting type 'long' for field [" + fieldName + "] due to parse failure: [" + e.getMessage() + "]"); + } + try { + fieldValues.forEach(Double::parseDouble); + return Collections.singletonMap(MAPPING_TYPE_SETTING, "double"); + } catch (NumberFormatException e) { + explanation.add("Rejecting type 'double' for field [" + fieldName + "] due to parse failure: [" + e.getMessage() + "]"); + } + } + else if (fieldValues.stream().allMatch(IP_GROK::match)) { + return Collections.singletonMap(MAPPING_TYPE_SETTING, "ip"); + } + + if (fieldValues.stream().anyMatch(LogStructureUtils::isMoreLikelyTextThanKeyword)) { + return Collections.singletonMap(MAPPING_TYPE_SETTING, "text"); + } + + return Collections.singletonMap(MAPPING_TYPE_SETTING, "keyword"); + } + + /** + * The thinking is that the longer the field value and the more spaces it contains, + * the more likely it is that it should be indexed as text rather than keyword. + */ + static boolean isMoreLikelyTextThanKeyword(String str) { + int length = str.length(); + return length > KEYWORD_MAX_LEN || length - str.replaceAll("\\s", "").length() > KEYWORD_MAX_SPACES; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java new file mode 100644 index 0000000000000..085599de847f0 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.List; + +public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory { + + private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build(); + + /** + * Rules are: + * - The file must be valid pipe (|) separated values + * - It must contain at least two complete records + * - There must be at least five fields per record (otherwise files with coincidental + * or no pipe characters could be treated as pipe separated) + * - Every pipe separated value record except the last must have the same number of fields + * The reason the last record is allowed to have fewer fields than the others is that + * it could have been truncated when the file was sampled. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values"); + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + PIPE_PREFERENCE, true); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java new file mode 100644 index 0000000000000..e0e80fa7465ba --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactory.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.List; + +public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory { + + /** + * Rules are: + * - The file must be valid semi-colon separated values + * - It must contain at least two complete records + * - There must be at least four fields per record (otherwise files with coincidental + * or no semi-colons could be treated as semi-colon separated) + * - Every semi-colon separated value record except the last must have the same number of fields + * The reason the last record is allowed to have fewer fields than the others is that + * it could have been truncated when the file was sampled. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4, + CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values"); + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java new file mode 100644 index 0000000000000..fd9d34096b2ed --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinder.java @@ -0,0 +1,486 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; +import org.supercsv.exception.SuperCsvException; +import org.supercsv.io.CsvListReader; +import org.supercsv.prefs.CsvPreference; +import org.supercsv.util.Util; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.DoubleSummaryStatistics; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Random; +import java.util.SortedMap; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class SeparatedValuesLogStructureFinder implements LogStructureFinder { + + private static final int MAX_LEVENSHTEIN_COMPARISONS = 100; + + private final List sampleMessages; + private final LogStructure structure; + + static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List explanation, String sample, + String charsetName, Boolean hasByteOrderMarker, + CsvPreference csvPreference, boolean trimFields) + throws IOException { + + Tuple>, List> parsed = readRows(sample, csvPreference); + List> rows = parsed.v1(); + List lineNumbers = parsed.v2(); + + Tuple headerInfo = findHeaderFromSample(explanation, rows); + boolean isHeaderInFile = headerInfo.v1(); + String[] header = headerInfo.v2(); + String[] headerWithNamedBlanks = new String[header.length]; + for (int i = 0; i < header.length; ++i) { + String rawHeader = header[i].isEmpty() ? "column" + (i + 1) : header[i]; + headerWithNamedBlanks[i] = trimFields ? rawHeader.trim() : rawHeader; + } + + List sampleLines = Arrays.asList(sample.split("\n")); + List sampleMessages = new ArrayList<>(); + List> sampleRecords = new ArrayList<>(); + int prevMessageEndLineNumber = isHeaderInFile ? lineNumbers.get(0) : -1; + for (int index = isHeaderInFile ? 1 : 0; index < rows.size(); ++index) { + List row = rows.get(index); + int lineNumber = lineNumbers.get(index); + Map sampleRecord = new LinkedHashMap<>(); + Util.filterListToMap(sampleRecord, headerWithNamedBlanks, + trimFields ? row.stream().map(String::trim).collect(Collectors.toList()) : row); + sampleRecords.add(sampleRecord); + sampleMessages.add( + sampleLines.subList(prevMessageEndLineNumber + 1, lineNumbers.get(index)).stream().collect(Collectors.joining("\n"))); + prevMessageEndLineNumber = lineNumber; + } + + String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n")); + + char delimiter = (char) csvPreference.getDelimiterChar(); + LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter)) + .setCharset(charsetName) + .setHasByteOrderMarker(hasByteOrderMarker) + .setSampleStart(preamble) + .setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1)) + .setNumMessagesAnalyzed(sampleRecords.size()) + .setHasHeaderRow(isHeaderInFile) + .setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList())); + + if (trimFields) { + structureBuilder.setShouldTrimFields(true); + } + + Tuple timeField = LogStructureUtils.guessTimestampField(explanation, sampleRecords); + if (timeField != null) { + String timeLineRegex = null; + StringBuilder builder = new StringBuilder("^"); + // We make the assumption that the timestamp will be on the first line of each record. Therefore, if the + // timestamp is the last column then either our assumption is wrong (and the approach will completely + // break down) or else every record is on a single line and there's no point creating a multiline config. + // This is why the loop excludes the last column. + for (String column : Arrays.asList(header).subList(0, header.length - 1)) { + if (timeField.v1().equals(column)) { + builder.append("\"?"); + String simpleTimePattern = timeField.v2().simplePattern.pattern(); + builder.append(simpleTimePattern.startsWith("\\b") ? simpleTimePattern.substring(2) : simpleTimePattern); + timeLineRegex = builder.toString(); + break; + } else { + builder.append(".*?"); + if (delimiter == '\t') { + builder.append("\\t"); + } else { + builder.append(delimiter); + } + } + } + + if (isHeaderInFile) { + structureBuilder.setExcludeLinesPattern("^" + Arrays.stream(header) + .map(column -> "\"?" + column.replace("\"", "\"\"").replaceAll("([\\\\|()\\[\\]{}^$*?])", "\\\\$1") + "\"?") + .collect(Collectors.joining(","))); + } + + structureBuilder.setTimestampField(timeField.v1()) + .setTimestampFormats(timeField.v2().dateFormats) + .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing()) + .setMultilineStartPattern(timeLineRegex); + } + + SortedMap mappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + + LogStructure structure = structureBuilder + .setMappings(mappings) + .setExplanation(explanation) + .build(); + + return new SeparatedValuesLogStructureFinder(sampleMessages, structure); + } + + private SeparatedValuesLogStructureFinder(List sampleMessages, LogStructure structure) { + this.sampleMessages = Collections.unmodifiableList(sampleMessages); + this.structure = structure; + } + + @Override + public List getSampleMessages() { + return sampleMessages; + } + + @Override + public LogStructure getStructure() { + return structure; + } + + static Tuple>, List> readRows(String sample, CsvPreference csvPreference) throws IOException { + + int fieldsInFirstRow = -1; + + List> rows = new ArrayList<>(); + List lineNumbers = new ArrayList<>(); + + try (CsvListReader csvReader = new CsvListReader(new StringReader(sample), csvPreference)) { + + try { + List row; + while ((row = csvReader.read()) != null) { + if (fieldsInFirstRow < 0) { + fieldsInFirstRow = row.size(); + } else { + // Tolerate extra columns if and only if they're empty + while (row.size() > fieldsInFirstRow && row.get(row.size() - 1) == null) { + row.remove(row.size() - 1); + } + } + rows.add(row); + lineNumbers.add(csvReader.getLineNumber()); + } + } catch (SuperCsvException e) { + // Tolerate an incomplete last row + if (notUnexpectedEndOfFile(e)) { + throw e; + } + } + } + + assert rows.isEmpty() == false; + assert lineNumbers.size() == rows.size(); + + if (rows.get(0).size() != rows.get(rows.size() - 1).size()) { + rows.remove(rows.size() - 1); + lineNumbers.remove(lineNumbers.size() - 1); + } + + // This should have been enforced by canCreateFromSample() + assert rows.size() > 1; + + return new Tuple<>(rows, lineNumbers); + } + + static Tuple findHeaderFromSample(List explanation, List> rows) { + + assert rows.isEmpty() == false; + + List firstRow = rows.get(0); + + boolean isHeaderInFile = true; + if (rowContainsDuplicateNonEmptyValues(firstRow)) { + isHeaderInFile = false; + explanation.add("First row contains duplicate values, so assuming it's not a header"); + } else { + if (rows.size() < 3) { + explanation.add("Too little data to accurately assess whether header is in sample - guessing it is"); + } else { + isHeaderInFile = isFirstRowUnusual(explanation, rows); + } + } + + if (isHeaderInFile) { + // SuperCSV will put nulls in the header if any columns don't have names, but empty strings are better for us + return new Tuple<>(true, firstRow.stream().map(field -> (field == null) ? "" : field).toArray(String[]::new)); + } else { + return new Tuple<>(false, IntStream.rangeClosed(1, firstRow.size()).mapToObj(num -> "column" + num).toArray(String[]::new)); + } + } + + static boolean rowContainsDuplicateNonEmptyValues(List row) { + + HashSet values = new HashSet<>(); + + for (String value : row) { + if (value != null && value.isEmpty() == false && values.add(value) == false) { + return true; + } + } + + return false; + } + + private static boolean isFirstRowUnusual(List explanation, List> rows) { + + assert rows.size() >= 3; + + List firstRow = rows.get(0); + String firstRowStr = firstRow.stream().map(field -> (field == null) ? "" : field).collect(Collectors.joining("")); + List> otherRows = rows.subList(1, rows.size()); + List otherRowStrs = new ArrayList<>(); + for (List row : otherRows) { + otherRowStrs.add(row.stream().map(str -> (str == null) ? "" : str).collect(Collectors.joining(""))); + } + + // Check lengths + + double firstRowLength = firstRowStr.length(); + DoubleSummaryStatistics otherRowStats = otherRowStrs.stream().mapToDouble(otherRow -> (double) otherRow.length()) + .collect(DoubleSummaryStatistics::new, DoubleSummaryStatistics::accept, DoubleSummaryStatistics::combine); + + double otherLengthRange = otherRowStats.getMax() - otherRowStats.getMin(); + if (firstRowLength < otherRowStats.getMin() - otherLengthRange / 10.0 || + firstRowLength > otherRowStats.getMax() + otherLengthRange / 10.0) { + explanation.add("First row is unusual based on length test: [" + firstRowLength + "] and [" + + toNiceString(otherRowStats) + "]"); + return true; + } + + explanation.add("First row is not unusual based on length test: [" + firstRowLength + "] and [" + + toNiceString(otherRowStats) + "]"); + + // Check edit distances + + DoubleSummaryStatistics firstRowStats = otherRows.stream().limit(MAX_LEVENSHTEIN_COMPARISONS) + .mapToDouble(otherRow -> (double) levenshteinFieldwiseCompareRows(firstRow, otherRow)) + .collect(DoubleSummaryStatistics::new, DoubleSummaryStatistics::accept, DoubleSummaryStatistics::combine); + + otherRowStats = new DoubleSummaryStatistics(); + int numComparisons = 0; + int proportion = otherRowStrs.size() / MAX_LEVENSHTEIN_COMPARISONS; + int innerIncrement = 1 + proportion * proportion; + Random random = new Random(firstRow.hashCode()); + for (int i = 0; numComparisons < MAX_LEVENSHTEIN_COMPARISONS && i < otherRowStrs.size(); ++i) { + for (int j = i + 1 + random.nextInt(innerIncrement); numComparisons < MAX_LEVENSHTEIN_COMPARISONS && j < otherRowStrs.size(); + j += innerIncrement) { + otherRowStats.accept((double) levenshteinFieldwiseCompareRows(otherRows.get(i), otherRows.get(j))); + ++numComparisons; + } + } + + if (firstRowStats.getAverage() > otherRowStats.getAverage() * 1.2) { + explanation.add("First row is unusual based on Levenshtein test [" + toNiceString(firstRowStats) + + "] and [" + toNiceString(otherRowStats) + "]"); + return true; + } + + explanation.add("First row is not unusual based on Levenshtein test [" + toNiceString(firstRowStats) + + "] and [" + toNiceString(otherRowStats) + "]"); + + return false; + } + + private static String toNiceString(DoubleSummaryStatistics stats) { + return String.format(Locale.ROOT, "count=%d, min=%f, average=%f, max=%f", stats.getCount(), stats.getMin(), stats.getAverage(), + stats.getMax()); + } + + /** + * Sum of the Levenshtein distances between corresponding elements + * in the two supplied lists _excluding_ the biggest difference. + * The reason the biggest difference is excluded is that sometimes + * there's a "message" field that is much longer than any of the other + * fields, varies enormously between rows, and skews the comparison. + */ + static int levenshteinFieldwiseCompareRows(List firstRow, List secondRow) { + + int largestSize = Math.max(firstRow.size(), secondRow.size()); + if (largestSize <= 1) { + return 0; + } + + int[] distances = new int[largestSize]; + + for (int index = 0; index < largestSize; ++index) { + distances[index] = levenshteinDistance((index < firstRow.size()) ? firstRow.get(index) : "", + (index < secondRow.size()) ? secondRow.get(index) : ""); + } + + Arrays.sort(distances); + + return IntStream.of(distances).limit(distances.length - 1).sum(); + } + + /** + * This method implements the simple algorithm for calculating Levenshtein distance. + */ + static int levenshteinDistance(String first, String second) { + + // There are some examples with pretty pictures of the matrix on Wikipedia here: + // http://en.wikipedia.org/wiki/Levenshtein_distance + + int firstLen = (first == null) ? 0 : first.length(); + int secondLen = (second == null) ? 0 : second.length(); + if (firstLen == 0) { + return secondLen; + } + if (secondLen == 0) { + return firstLen; + } + + int[] currentCol = new int[secondLen + 1]; + int[] prevCol = new int[secondLen + 1]; + + // Populate the left column + for (int down = 0; down <= secondLen; ++down) { + currentCol[down] = down; + } + + // Calculate the other entries in the matrix + for (int across = 1; across <= firstLen; ++across) { + int[] tmp = prevCol; + prevCol = currentCol; + // We could allocate a new array for currentCol here, but it's more efficient to reuse the one that's now redundant + currentCol = tmp; + + currentCol[0] = across; + + for (int down = 1; down <= secondLen; ++down) { + + // Do the strings differ at the point we've reached? + if (first.charAt(across - 1) == second.charAt(down - 1)) { + + // No, they're the same => no extra cost + currentCol[down] = prevCol[down - 1]; + } else { + // Yes, they differ, so there are 3 options: + + // 1) Deletion => cell to the left's value plus 1 + int option1 = prevCol[down]; + + // 2) Insertion => cell above's value plus 1 + int option2 = currentCol[down - 1]; + + // 3) Substitution => cell above left's value plus 1 + int option3 = prevCol[down - 1]; + + // Take the cheapest option of the 3 + currentCol[down] = Math.min(Math.min(option1, option2), option3) + 1; + } + } + } + + // Result is the value in the bottom right hand corner of the matrix + return currentCol[secondLen]; + } + + static boolean lineHasUnescapedQuote(String line, CsvPreference csvPreference) { + char quote = csvPreference.getQuoteChar(); + String lineWithEscapedQuotesRemoved = line.replace(String.valueOf(quote) + quote, ""); + for (int index = 1; index < lineWithEscapedQuotesRemoved.length() - 1; ++index) { + if (lineWithEscapedQuotesRemoved.charAt(index) == quote && + lineWithEscapedQuotesRemoved.codePointAt(index - 1) != csvPreference.getDelimiterChar() && + lineWithEscapedQuotesRemoved.codePointAt(index + 1) != csvPreference.getDelimiterChar()) { + return true; + } + } + return false; + } + + static boolean canCreateFromSample(List explanation, String sample, int minFieldsPerRow, CsvPreference csvPreference, + String formatName) { + + // Logstash's CSV parser won't tolerate fields where just part of the + // value is quoted, whereas SuperCSV will, hence this extra check + String[] sampleLines = sample.split("\n"); + for (String sampleLine : sampleLines) { + if (lineHasUnescapedQuote(sampleLine, csvPreference)) { + explanation.add("Not " + formatName + + " because a line has an unescaped quote that is not at the beginning or end of a field: [" + sampleLine + "]"); + return false; + } + } + + try (CsvListReader csvReader = new CsvListReader(new StringReader(sample), csvPreference)) { + + int fieldsInFirstRow = -1; + int fieldsInLastRow = -1; + + int numberOfRows = 0; + try { + List row; + while ((row = csvReader.read()) != null) { + + int fieldsInThisRow = row.size(); + ++numberOfRows; + if (fieldsInFirstRow < 0) { + fieldsInFirstRow = fieldsInThisRow; + if (fieldsInFirstRow < minFieldsPerRow) { + explanation.add("Not " + formatName + " because the first row has fewer than [" + minFieldsPerRow + + "] fields: [" + fieldsInFirstRow + "]"); + return false; + } + fieldsInLastRow = fieldsInFirstRow; + continue; + } + + // Tolerate extra columns if and only if they're empty + while (fieldsInThisRow > fieldsInFirstRow && row.get(fieldsInThisRow - 1) == null) { + --fieldsInThisRow; + } + + if (fieldsInLastRow != fieldsInFirstRow) { + explanation.add("Not " + formatName + " because row [" + (numberOfRows - 1) + + "] has a different number of fields to the first row: [" + fieldsInFirstRow + "] and [" + + fieldsInLastRow + "]"); + return false; + } + + fieldsInLastRow = fieldsInThisRow; + } + + if (fieldsInLastRow > fieldsInFirstRow) { + explanation.add("Not " + formatName + " because last row has more fields than first row: [" + fieldsInFirstRow + + "] and [" + fieldsInLastRow + "]"); + return false; + } + if (fieldsInLastRow < fieldsInFirstRow) { + --numberOfRows; + } + } catch (SuperCsvException e) { + // Tolerate an incomplete last row + if (notUnexpectedEndOfFile(e)) { + explanation.add("Not " + formatName + " because there was a parsing exception: [" + e.getMessage() + "]"); + return false; + } + } + if (numberOfRows <= 1) { + explanation.add("Not " + formatName + " because fewer than 2 complete records in sample: [" + numberOfRows + "]"); + return false; + } + explanation.add("Deciding sample is " + formatName); + return true; + + } catch (IOException e) { + explanation.add("Not " + formatName + " because there was a parsing exception: [" + e.getMessage() + "]"); + return false; + } + } + + private static boolean notUnexpectedEndOfFile(SuperCsvException e) { + return e.getMessage().startsWith("unexpected end of file while reading quoted column") == false; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java new file mode 100644 index 0000000000000..722751a4cf49e --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java @@ -0,0 +1,201 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.regex.Pattern; + +public class TextLogStructureFinder implements LogStructureFinder { + + private final List sampleMessages; + private final LogStructure structure; + + static TextLogStructureFinder makeTextLogStructureFinder(List explanation, String sample, String charsetName, + Boolean hasByteOrderMarker) { + + String[] sampleLines = sample.split("\n"); + Tuple> bestTimestamp = mostLikelyTimestamp(sampleLines); + if (bestTimestamp == null) { + // Is it appropriate to treat a file that is neither structured nor has + // a regular pattern of timestamps as a log file? Probably not... + throw new IllegalArgumentException("Could not find a timestamp in the log sample provided"); + } + + explanation.add("Most likely timestamp format is [" + bestTimestamp.v1() + "]"); + + List sampleMessages = new ArrayList<>(); + StringBuilder preamble = new StringBuilder(); + int linesConsumed = 0; + StringBuilder message = null; + int linesInMessage = 0; + String multiLineRegex = createMultiLineMessageStartRegex(bestTimestamp.v2(), bestTimestamp.v1().simplePattern.pattern()); + Pattern multiLinePattern = Pattern.compile(multiLineRegex); + for (String sampleLine : sampleLines) { + if (multiLinePattern.matcher(sampleLine).find()) { + if (message != null) { + sampleMessages.add(message.toString()); + linesConsumed += linesInMessage; + } + message = new StringBuilder(sampleLine); + linesInMessage = 1; + } else { + // If message is null here then the sample probably began with the incomplete ending of a previous message + if (message == null) { + // We count lines before the first message as consumed (just like we would + // for the CSV header or lines before the first XML document starts) + ++linesConsumed; + } else { + message.append('\n').append(sampleLine); + ++linesInMessage; + } + } + if (sampleMessages.size() < 2) { + preamble.append(sampleLine).append('\n'); + } + } + // Don't add the last message, as it might be partial and mess up subsequent pattern finding + + LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.SEMI_STRUCTURED_TEXT) + .setCharset(charsetName) + .setHasByteOrderMarker(hasByteOrderMarker) + .setSampleStart(preamble.toString()) + .setNumLinesAnalyzed(linesConsumed) + .setNumMessagesAnalyzed(sampleMessages.size()) + .setMultilineStartPattern(multiLineRegex); + + SortedMap mappings = new TreeMap<>(); + mappings.put("message", Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "text")); + mappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + + // We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove + String interimTimestampField; + String grokPattern; + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + Tuple timestampFieldAndFullMatchGrokPattern = grokPatternCreator.findFullLineGrokPattern(); + if (timestampFieldAndFullMatchGrokPattern != null) { + interimTimestampField = timestampFieldAndFullMatchGrokPattern.v1(); + grokPattern = timestampFieldAndFullMatchGrokPattern.v2(); + } else { + interimTimestampField = "timestamp"; + grokPattern = grokPatternCreator.createGrokPatternFromExamples(bestTimestamp.v1().grokPatternName, interimTimestampField); + } + + LogStructure structure = structureBuilder + .setTimestampField(interimTimestampField) + .setTimestampFormats(bestTimestamp.v1().dateFormats) + .setNeedClientTimezone(bestTimestamp.v1().hasTimezoneDependentParsing()) + .setGrokPattern(grokPattern) + .setMappings(mappings) + .setExplanation(explanation) + .build(); + + return new TextLogStructureFinder(sampleMessages, structure); + } + + private TextLogStructureFinder(List sampleMessages, LogStructure structure) { + this.sampleMessages = Collections.unmodifiableList(sampleMessages); + this.structure = structure; + } + + @Override + public List getSampleMessages() { + return sampleMessages; + } + + @Override + public LogStructure getStructure() { + return structure; + } + + static Tuple> mostLikelyTimestamp(String[] sampleLines) { + + Map>> timestampMatches = new LinkedHashMap<>(); + + int remainingLines = sampleLines.length; + double differenceBetweenTwoHighestWeights = 0.0; + for (String sampleLine : sampleLines) { + TimestampMatch match = TimestampFormatFinder.findFirstMatch(sampleLine); + if (match != null) { + TimestampMatch pureMatch = new TimestampMatch(match.candidateIndex, "", match.dateFormats, match.simplePattern, + match.grokPatternName, ""); + timestampMatches.compute(pureMatch, (k, v) -> { + if (v == null) { + return new Tuple<>(weightForMatch(match.preface), new HashSet<>(Collections.singletonList(match.preface))); + } else { + v.v2().add(match.preface); + return new Tuple<>(v.v1() + weightForMatch(match.preface), v.v2()); + } + }); + differenceBetweenTwoHighestWeights = findDifferenceBetweenTwoHighestWeights(timestampMatches.values()); + } + // The highest possible weight is 1, so if the difference between the two highest weights + // is less than the number of lines remaining then the leader cannot possibly be overtaken + if (differenceBetweenTwoHighestWeights > --remainingLines) { + break; + } + } + + double highestWeight = 0.0; + Tuple> highestWeightMatch = null; + for (Map.Entry>> entry : timestampMatches.entrySet()) { + double weight = entry.getValue().v1(); + if (weight > highestWeight) { + highestWeight = weight; + highestWeightMatch = new Tuple<>(entry.getKey(), entry.getValue().v2()); + } + } + return highestWeightMatch; + } + + /** + * Used to weight a timestamp match according to how far along the line it is found. + * Timestamps at the very beginning of the line are given a weight of 1. The weight + * progressively decreases the more text there is preceding the timestamp match, but + * is always greater than 0. + * @return A weight in the range (0, 1]. + */ + private static double weightForMatch(String preface) { + return Math.pow(1.0 + preface.length() / 15.0, -1.1); + } + + private static double findDifferenceBetweenTwoHighestWeights(Collection>> timestampMatches) { + double highestWeight = 0.0; + double secondHighestWeight = 0.0; + for (Tuple> timestampMatch : timestampMatches) { + double weight = timestampMatch.v1(); + if (weight > highestWeight) { + secondHighestWeight = highestWeight; + highestWeight = weight; + } else if (weight > secondHighestWeight) { + secondHighestWeight = weight; + } + } + return highestWeight - secondHighestWeight; + } + + static String createMultiLineMessageStartRegex(Collection prefaces, String timestampRegex) { + + StringBuilder builder = new StringBuilder("^"); + GrokPatternCreator.addIntermediateRegex(builder, prefaces); + builder.append(timestampRegex); + if (builder.substring(0, 3).equals("^\\b")) { + builder.delete(1, 3); + } + return builder.toString(); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactory.java new file mode 100644 index 0000000000000..d129ba95bd87e --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactory.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.List; +import java.util.regex.Pattern; + +public class TextLogStructureFinderFactory implements LogStructureFinderFactory { + + // This works because, by default, dot doesn't match newlines + private static final Pattern TWO_NON_BLANK_LINES_PATTERN = Pattern.compile(".\n+."); + + /** + * This format matches if the sample contains at least one newline and at least two + * non-blank lines. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + if (sample.indexOf('\n') < 0) { + explanation.add("Not text because sample contains no newlines"); + return false; + } + if (TWO_NON_BLANK_LINES_PATTERN.matcher(sample).find() == false) { + explanation.add("Not text because sample contains fewer than two non-blank lines"); + return false; + } + + explanation.add("Deciding sample is text"); + return true; + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) { + return TextLogStructureFinder.makeTextLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinder.java new file mode 100644 index 0000000000000..30c94378f9e22 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinder.java @@ -0,0 +1,427 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.grok.Grok; + +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Used to find the best timestamp format for one of the following situations: + * 1. Matching an entire field value + * 2. Matching a timestamp found somewhere within a message + */ +public final class TimestampFormatFinder { + + private static final String PREFACE = "preface"; + private static final String EPILOGUE = "epilogue"; + + private static final Pattern FRACTIONAL_SECOND_INTERPRETER = Pattern.compile("([:.,])(\\d{3,9})"); + private static final char DEFAULT_FRACTIONAL_SECOND_SEPARATOR = ','; + + /** + * The timestamp patterns are complex and it can be slow to prove they do not + * match anywhere in a long message. Many of the timestamps are similar and + * will never be found in a string if simpler sub-patterns do not exist in the + * string. These sub-patterns can be used to quickly rule out multiple complex + * patterns. These patterns do not need to represent quantities that are + * useful to know the value of, merely character sequences that can be used to + * prove that several more complex patterns cannot possibly match. + */ + private static final List QUICK_RULE_OUT_PATTERNS = Arrays.asList( + // YYYY-MM-dd followed by a space + Pattern.compile("\\b\\d{4}-\\d{2}-\\d{2} "), + // The end of some number (likely year or day) followed by a space then HH:mm + Pattern.compile("\\d \\d{2}:\\d{2}\\b"), + // HH:mm:ss surrounded by spaces + Pattern.compile(" \\d{2}:\\d{2}:\\d{2} ") + ); + + /** + * The first match in this list will be chosen, so it needs to be ordered + * such that more generic patterns come after more specific patterns. + */ + static final List ORDERED_CANDIDATE_FORMATS = Arrays.asList( + // The TOMCAT_DATESTAMP format has to come before ISO8601 because it's basically ISO8601 but + // with a space before the timezone, and because the timezone is optional in ISO8601 it will + // be recognised as that with the timezone missed off if ISO8601 is checked first + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS Z", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "\\b20\\d{2}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9} (?:Z|[+-]%{HOUR}%{MINUTE})\\b", + "TOMCAT_DATESTAMP", Arrays.asList(0, 1)), + // The Elasticsearch ISO8601 parser requires a literal T between the date and time, so + // longhand formats are needed if there's a space instead + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}(?:Z|[+-]%{HOUR}%{MINUTE})\\b", + "TIMESTAMP_ISO8601", Arrays.asList(0, 1)), + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSSZZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}[+-]%{HOUR}:%{MINUTE}\\b", + "TIMESTAMP_ISO8601", Arrays.asList(0, 1)), + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss,SSS", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}\\b", "TIMESTAMP_ISO8601", + Arrays.asList(0, 1)), + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)(?:Z|[+-]%{HOUR}%{MINUTE})\\b", "TIMESTAMP_ISO8601", + Arrays.asList(0, 1)), + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ssZZ", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)[+-]%{HOUR}:%{MINUTE}\\b", "TIMESTAMP_ISO8601", + Arrays.asList(0, 1)), + new CandidateTimestampFormat("YYYY-MM-dd HH:mm:ss", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", + "\\b%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}:(?:[0-5][0-9]|60)\\b", "TIMESTAMP_ISO8601", + Arrays.asList(0, 1)), + new CandidateTimestampFormat("ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "\\b%{TIMESTAMP_ISO8601}\\b", + "TIMESTAMP_ISO8601"), + new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm:ss zzz", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2} ", + "\\b%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{TZ}\\b", "DATESTAMP_RFC822", Arrays.asList(1, 2)), + new CandidateTimestampFormat("EEE MMM dd YYYY HH:mm zzz", "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2} ", + "\\b%{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE} %{TZ}\\b", "DATESTAMP_RFC822", Collections.singletonList(1)), + new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss ZZ", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", + "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:Z|[+-]%{HOUR}:%{MINUTE})\\b", + "DATESTAMP_RFC2822", Arrays.asList(1, 2)), + new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm:ss Z", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", + "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:Z|[+-]%{HOUR}%{MINUTE})\\b", + "DATESTAMP_RFC2822", Arrays.asList(1, 2)), + new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm ZZ", "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", + "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE} (?:Z|[+-]%{HOUR}:%{MINUTE})\\b", "DATESTAMP_RFC2822", + Collections.singletonList(1)), + new CandidateTimestampFormat("EEE, dd MMM YYYY HH:mm Z", "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", + "\\b%{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{HOUR}:%{MINUTE} (?:Z|[+-]%{HOUR}%{MINUTE})\\b", "DATESTAMP_RFC2822", + Collections.singletonList(1)), + new CandidateTimestampFormat("EEE MMM dd HH:mm:ss zzz YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", + "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{TZ} %{YEAR}\\b", "DATESTAMP_OTHER", + Arrays.asList(1, 2)), + new CandidateTimestampFormat("EEE MMM dd HH:mm zzz YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", + "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE} %{TZ} %{YEAR}\\b", "DATESTAMP_OTHER", Collections.singletonList(1)), + new CandidateTimestampFormat("YYYYMMddHHmmss", "\\b\\d{14}\\b", + "\\b20\\d{2}%{MONTHNUM2}(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01]))(?:2[0123]|[01][0-9])%{MINUTE}(?:[0-5][0-9]|60)\\b", + "DATESTAMP_EVENTLOG"), + new CandidateTimestampFormat("EEE MMM dd HH:mm:ss YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\b", + "\\b%{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) %{YEAR}\\b", "HTTPDERROR_DATE", Arrays.asList(1, 2)), + new CandidateTimestampFormat(Arrays.asList("MMM dd HH:mm:ss,SSS", "MMM d HH:mm:ss,SSS"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2},\\d{3}", + "%{MONTH} +%{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)[:.,][0-9]{3,9}\\b", "SYSLOGTIMESTAMP", + Collections.singletonList(1)), + new CandidateTimestampFormat(Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "%{MONTH} +%{MONTHDAY} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)\\b", + "SYSLOGTIMESTAMP", Collections.singletonList(1)), + new CandidateTimestampFormat("dd/MMM/YYYY:HH:mm:ss Z", "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", + "\\b%{MONTHDAY}/%{MONTH}/%{YEAR}:%{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) [+-]?%{HOUR}%{MINUTE}\\b", "HTTPDATE"), + new CandidateTimestampFormat("MMM dd, YYYY K:mm:ss a", "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", + "%{MONTH} %{MONTHDAY}, 20\\d{2} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60) (?:AM|PM)\\b", "CATALINA_DATESTAMP"), + new CandidateTimestampFormat(Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2}\\b", + "%{MONTH} +%{MONTHDAY} %{YEAR} %{HOUR}:%{MINUTE}:(?:[0-5][0-9]|60)\\b", "CISCOTIMESTAMP", Collections.singletonList(1)), + new CandidateTimestampFormat("UNIX_MS", "\\b\\d{13}\\b", "\\b\\d{13}\\b", "POSINT"), + new CandidateTimestampFormat("UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "\\b\\d{10}\\.(?:\\d{3}){1,3}\\b", "NUMBER"), + new CandidateTimestampFormat("UNIX", "\\b\\d{10}\\b", "\\b\\d{10}\\b", "POSINT"), + new CandidateTimestampFormat("TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM") + ); + + private TimestampFormatFinder() { + } + + /** + * Find the first timestamp format that matches part of the supplied value. + * @param text The value that the returned timestamp format must exist within. + * @return The timestamp format, or null if none matches. + */ + public static TimestampMatch findFirstMatch(String text) { + return findFirstMatch(text, 0); + } + + /** + * Find the first timestamp format that matches part of the supplied value, + * excluding a specified number of candidate formats. + * @param text The value that the returned timestamp format must exist within. + * @param ignoreCandidates The number of candidate formats to exclude from the search. + * @return The timestamp format, or null if none matches. + */ + public static TimestampMatch findFirstMatch(String text, int ignoreCandidates) { + Boolean[] quickRuleoutMatches = new Boolean[QUICK_RULE_OUT_PATTERNS.size()]; + int index = ignoreCandidates; + for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) { + boolean quicklyRuledOut = false; + for (Integer quickRuleOutIndex : candidate.quickRuleOutIndices) { + if (quickRuleoutMatches[quickRuleOutIndex] == null) { + quickRuleoutMatches[quickRuleOutIndex] = QUICK_RULE_OUT_PATTERNS.get(quickRuleOutIndex).matcher(text).find(); + } + if (quickRuleoutMatches[quickRuleOutIndex] == false) { + quicklyRuledOut = true; + break; + } + } + if (quicklyRuledOut == false) { + Map captures = candidate.strictSearchGrok.captures(text); + if (captures != null) { + String preface = captures.getOrDefault(PREFACE, "").toString(); + String epilogue = captures.getOrDefault(EPILOGUE, "").toString(); + return makeTimestampMatch(candidate, index, preface, text.substring(preface.length(), + text.length() - epilogue.length()), epilogue); + } + } + ++index; + } + return null; + } + + /** + * Find the best timestamp format for matching an entire field value. + * @param text The value that the returned timestamp format must match in its entirety. + * @return The timestamp format, or null if none matches. + */ + public static TimestampMatch findFirstFullMatch(String text) { + return findFirstFullMatch(text, 0); + } + + /** + * Find the best timestamp format for matching an entire field value, + * excluding a specified number of candidate formats. + * @param text The value that the returned timestamp format must match in its entirety. + * @param ignoreCandidates The number of candidate formats to exclude from the search. + * @return The timestamp format, or null if none matches. + */ + public static TimestampMatch findFirstFullMatch(String text, int ignoreCandidates) { + int index = ignoreCandidates; + for (CandidateTimestampFormat candidate : ORDERED_CANDIDATE_FORMATS.subList(ignoreCandidates, ORDERED_CANDIDATE_FORMATS.size())) { + Map captures = candidate.strictFullMatchGrok.captures(text); + if (captures != null) { + return makeTimestampMatch(candidate, index, "", text, ""); + } + ++index; + } + return null; + } + + private static TimestampMatch makeTimestampMatch(CandidateTimestampFormat chosenTimestampFormat, int chosenIndex, + String preface, String matchedDate, String epilogue) { + Tuple fractionalSecondsInterpretation = interpretFractionalSeconds(matchedDate); + List dateFormats = chosenTimestampFormat.dateFormats; + Pattern simplePattern = chosenTimestampFormat.simplePattern; + char separator = fractionalSecondsInterpretation.v1(); + if (separator != DEFAULT_FRACTIONAL_SECOND_SEPARATOR) { + dateFormats = dateFormats.stream().map(dateFormat -> dateFormat.replace(DEFAULT_FRACTIONAL_SECOND_SEPARATOR, separator)) + .collect(Collectors.toList()); + if (dateFormats.stream().noneMatch(dateFormat -> dateFormat.startsWith("UNIX"))) { + String patternStr = simplePattern.pattern(); + int separatorPos = patternStr.lastIndexOf(DEFAULT_FRACTIONAL_SECOND_SEPARATOR); + if (separatorPos >= 0) { + StringBuilder newPatternStr = new StringBuilder(patternStr); + newPatternStr.replace(separatorPos, separatorPos + 1, ((separator == '.') ? "\\" : "") + separator); + simplePattern = Pattern.compile(newPatternStr.toString()); + } + } + } + int numberOfDigitsInFractionalComponent = fractionalSecondsInterpretation.v2(); + if (numberOfDigitsInFractionalComponent > 3) { + String fractionalSecondsFormat = "SSSSSSSSS".substring(0, numberOfDigitsInFractionalComponent); + dateFormats = dateFormats.stream().map(dateFormat -> dateFormat.replace("SSS", fractionalSecondsFormat)) + .collect(Collectors.toList()); + } + return new TimestampMatch(chosenIndex, preface, dateFormats, simplePattern, chosenTimestampFormat.standardGrokPatternName, + epilogue); + } + + /** + * Interpret the fractional seconds component of a date to determine two things: + * 1. The separator character - one of colon, comma and dot. + * 2. The number of digits in the fractional component. + * @param date The textual representation of the date for which fractional seconds are to be interpreted. + * @return A tuple of (fractional second separator character, number of digits in fractional component). + */ + static Tuple interpretFractionalSeconds(String date) { + + Matcher matcher = FRACTIONAL_SECOND_INTERPRETER.matcher(date); + if (matcher.find()) { + return new Tuple<>(matcher.group(1).charAt(0), matcher.group(2).length()); + } + + return new Tuple<>(DEFAULT_FRACTIONAL_SECOND_SEPARATOR, 0); + } + + /** + * Represents a timestamp that has matched a field value or been found within a message. + */ + public static final class TimestampMatch { + + /** + * The index of the corresponding entry in the ORDERED_CANDIDATE_FORMATS list. + */ + public final int candidateIndex; + + /** + * Text that came before the timestamp in the matched field/message. + */ + public final String preface; + + /** + * Time format specifier(s) that will work with Logstash and Ingest pipeline date parsers. + */ + public final List dateFormats; + + /** + * A simple regex that will work in many languages to detect whether the timestamp format + * exists in a particular line. + */ + public final Pattern simplePattern; + + /** + * Name of an out-of-the-box Grok pattern that will match the timestamp. + */ + public final String grokPatternName; + + /** + * Text that came after the timestamp in the matched field/message. + */ + public final String epilogue; + + TimestampMatch(int candidateIndex, String preface, String dateFormat, String simpleRegex, String grokPatternName, String epilogue) { + this(candidateIndex, preface, Collections.singletonList(dateFormat), simpleRegex, grokPatternName, epilogue); + } + + TimestampMatch(int candidateIndex, String preface, String dateFormat, String simpleRegex, String grokPatternName, String epilogue, + boolean hasFractionalComponentSmallerThanMillisecond) { + this(candidateIndex, preface, Collections.singletonList(dateFormat), simpleRegex, grokPatternName, epilogue); + } + + TimestampMatch(int candidateIndex, String preface, List dateFormats, String simpleRegex, String grokPatternName, + String epilogue) { + this(candidateIndex, preface, dateFormats, Pattern.compile(simpleRegex), grokPatternName, epilogue); + } + + TimestampMatch(int candidateIndex, String preface, List dateFormats, Pattern simplePattern, String grokPatternName, + String epilogue) { + this.candidateIndex = candidateIndex; + this.preface = preface; + this.dateFormats = dateFormats; + this.simplePattern = simplePattern; + this.grokPatternName = grokPatternName; + this.epilogue = epilogue; + } + + /** + * Does the parsing the timestamp produce different results depending on the timezone of the parser? + * I.e., does the textual representation NOT define the timezone? + */ + public boolean hasTimezoneDependentParsing() { + return dateFormats.stream() + .anyMatch(dateFormat -> dateFormat.contains("HH") && dateFormat.toLowerCase(Locale.ROOT).indexOf('z') == -1); + } + + /** + * Sometimes Elasticsearch mappings for dates need to include the format. + * This method returns appropriate mappings settings: at minimum "type"="date", + * and possibly also a "format" setting. + */ + public Map getEsDateMappingTypeWithFormat() { + if (dateFormats.contains("TAI64N")) { + // There's no format for TAI64N in the date formats used in mappings + return Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"); + } + Map mapping = new LinkedHashMap<>(); + mapping.put(LogStructureUtils.MAPPING_TYPE_SETTING, "date"); + String formats = dateFormats.stream().flatMap(format -> { + switch (format) { + case "ISO8601": + return Stream.empty(); + case "UNIX_MS": + return Stream.of("epoch_millis"); + case "UNIX": + return Stream.of("epoch_second"); + default: + return Stream.of(format); + } + }).collect(Collectors.joining("||")); + if (formats.isEmpty() == false) { + mapping.put(LogStructureUtils.MAPPING_FORMAT_SETTING, formats); + } + return mapping; + } + + @Override + public int hashCode() { + return Objects.hash(candidateIndex, preface, dateFormats, simplePattern.pattern(), grokPatternName, epilogue); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + TimestampMatch that = (TimestampMatch) other; + return this.candidateIndex == that.candidateIndex && + Objects.equals(this.preface, that.preface) && + Objects.equals(this.dateFormats, that.dateFormats) && + Objects.equals(this.simplePattern.pattern(), that.simplePattern.pattern()) && + Objects.equals(this.grokPatternName, that.grokPatternName) && + Objects.equals(this.epilogue, that.epilogue); + } + + @Override + public String toString() { + return "index = " + candidateIndex + (preface.isEmpty() ? "" : ", preface = '" + preface + "'") + + ", date formats = " + dateFormats.stream().collect(Collectors.joining("', '", "[ '", "' ]")) + + ", simple pattern = '" + simplePattern.pattern() + "', grok pattern = '" + grokPatternName + "'" + + (epilogue.isEmpty() ? "" : ", epilogue = '" + epilogue + "'"); + } + } + + static final class CandidateTimestampFormat { + + final List dateFormats; + final Pattern simplePattern; + final Grok strictSearchGrok; + final Grok strictFullMatchGrok; + final String standardGrokPatternName; + final List quickRuleOutIndices; + + CandidateTimestampFormat(String dateFormat, String simpleRegex, String strictGrokPattern, String standardGrokPatternName) { + this(Collections.singletonList(dateFormat), simpleRegex, strictGrokPattern, standardGrokPatternName); + } + + CandidateTimestampFormat(String dateFormat, String simpleRegex, String strictGrokPattern, String standardGrokPatternName, + List quickRuleOutIndices) { + this(Collections.singletonList(dateFormat), simpleRegex, strictGrokPattern, standardGrokPatternName, quickRuleOutIndices); + } + + CandidateTimestampFormat(List dateFormats, String simpleRegex, String strictGrokPattern, String standardGrokPatternName) { + this(dateFormats, simpleRegex, strictGrokPattern, standardGrokPatternName, Collections.emptyList()); + } + + CandidateTimestampFormat(List dateFormats, String simpleRegex, String strictGrokPattern, String standardGrokPatternName, + List quickRuleOutIndices) { + this.dateFormats = dateFormats; + this.simplePattern = Pattern.compile(simpleRegex, Pattern.MULTILINE); + // The (?m) here has the Ruby meaning, which is equivalent to (?s) in Java + this.strictSearchGrok = new Grok(Grok.getBuiltinPatterns(), "(?m)%{DATA:" + PREFACE + "}" + strictGrokPattern + + "%{GREEDYDATA:" + EPILOGUE + "}"); + this.strictFullMatchGrok = new Grok(Grok.getBuiltinPatterns(), strictGrokPattern); + this.standardGrokPatternName = standardGrokPatternName; + assert quickRuleOutIndices.stream() + .noneMatch(quickRuleOutIndex -> quickRuleOutIndex < 0 || quickRuleOutIndex >= QUICK_RULE_OUT_PATTERNS.size()); + this.quickRuleOutIndices = quickRuleOutIndices; + } + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java new file mode 100644 index 0000000000000..733b32346fbed --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.List; + +public class TsvLogStructureFinderFactory implements LogStructureFinderFactory { + + /** + * Rules are: + * - The file must be valid TSV + * - It must contain at least two complete records + * - There must be at least two fields per record (otherwise files with no tabs could be treated as TSV!) + * - Every TSV record except the last must have the same number of fields + * The reason the last record is allowed to have fewer fields than the others is that + * it could have been truncated when the file was sampled. + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV"); + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException { + return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, + CsvPreference.TAB_PREFERENCE, false); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java new file mode 100644 index 0000000000000..d664a9ccb8213 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinder.java @@ -0,0 +1,172 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.regex.Pattern; + +public class XmlLogStructureFinder implements LogStructureFinder { + + private final List sampleMessages; + private final LogStructure structure; + + static XmlLogStructureFinder makeXmlLogStructureFinder(List explanation, String sample, String charsetName, + Boolean hasByteOrderMarker) + throws IOException, ParserConfigurationException, SAXException { + + String messagePrefix; + try (Scanner scanner = new Scanner(sample)) { + messagePrefix = scanner.next(); + } + + DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); + docBuilderFactory.setNamespaceAware(false); + docBuilderFactory.setValidating(false); + + List sampleMessages = new ArrayList<>(); + List> sampleRecords = new ArrayList<>(); + + String[] sampleDocEnds = sample.split(Pattern.quote(messagePrefix)); + StringBuilder preamble = new StringBuilder(sampleDocEnds[0]); + int linesConsumed = numNewlinesIn(sampleDocEnds[0]); + for (int i = 1; i < sampleDocEnds.length; ++i) { + String sampleDoc = messagePrefix + sampleDocEnds[i]; + if (i < 3) { + preamble.append(sampleDoc); + } + DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); + try (InputStream is = new ByteArrayInputStream(sampleDoc.getBytes(StandardCharsets.UTF_8))) { + sampleRecords.add(docToMap(docBuilder.parse(is))); + sampleMessages.add(sampleDoc); + linesConsumed += numNewlinesIn(sampleDoc); + } catch (SAXException e) { + // Tolerate an incomplete last record as long as we have one complete record + if (sampleRecords.isEmpty() || i < sampleDocEnds.length - 1) { + throw e; + } + } + } + + if (sample.endsWith("\n") == false) { + ++linesConsumed; + } + + // If we get here the XML parser should have confirmed this + assert messagePrefix.charAt(0) == '<'; + String topLevelTag = messagePrefix.substring(1); + + LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.XML) + .setCharset(charsetName) + .setHasByteOrderMarker(hasByteOrderMarker) + .setSampleStart(preamble.toString()) + .setNumLinesAnalyzed(linesConsumed) + .setNumMessagesAnalyzed(sampleRecords.size()) + .setMultilineStartPattern("^\\s*<" + topLevelTag); + + Tuple timeField = LogStructureUtils.guessTimestampField(explanation, sampleRecords); + if (timeField != null) { + structureBuilder.setTimestampField(timeField.v1()) + .setTimestampFormats(timeField.v2().dateFormats) + .setNeedClientTimezone(timeField.v2().hasTimezoneDependentParsing()); + } + + SortedMap innerMappings = LogStructureUtils.guessMappings(explanation, sampleRecords); + Map secondLevelProperties = new LinkedHashMap<>(); + secondLevelProperties.put(LogStructureUtils.MAPPING_TYPE_SETTING, "object"); + secondLevelProperties.put(LogStructureUtils.MAPPING_PROPERTIES_SETTING, innerMappings); + SortedMap outerMappings = new TreeMap<>(); + outerMappings.put(topLevelTag, secondLevelProperties); + outerMappings.put(LogStructureUtils.DEFAULT_TIMESTAMP_FIELD, + Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date")); + + LogStructure structure = structureBuilder + .setMappings(outerMappings) + .setExplanation(explanation) + .build(); + + return new XmlLogStructureFinder(sampleMessages, structure); + } + + private XmlLogStructureFinder(List sampleMessages, LogStructure structure) { + this.sampleMessages = Collections.unmodifiableList(sampleMessages); + this.structure = structure; + } + + @Override + public List getSampleMessages() { + return sampleMessages; + } + + @Override + public LogStructure getStructure() { + return structure; + } + + private static int numNewlinesIn(String str) { + return (int) str.chars().filter(c -> c == '\n').count(); + } + + private static Map docToMap(Document doc) { + + Map docAsMap = new LinkedHashMap<>(); + + doc.getDocumentElement().normalize(); + addNodeToMap(doc.getDocumentElement(), docAsMap); + + return docAsMap; + } + + private static void addNodeToMap(Node node, Map nodeAsMap) { + + NamedNodeMap attributes = node.getAttributes(); + for (int i = 0; i < attributes.getLength(); ++i) { + Node attribute = attributes.item(i); + nodeAsMap.put(attribute.getNodeName(), attribute.getNodeValue()); + } + + NodeList children = node.getChildNodes(); + for (int i = 0; i < children.getLength(); ++i) { + Node child = children.item(i); + if (child.getNodeType() == Node.ELEMENT_NODE) { + if (child.getChildNodes().getLength() == 1) { + Node grandChild = child.getChildNodes().item(0); + String value = grandChild.getNodeValue().trim(); + if (value.isEmpty() == false) { + nodeAsMap.put(child.getNodeName(), value); + } + } else { + Map childNodeAsMap = new LinkedHashMap<>(); + addNodeToMap(child, childNodeAsMap); + if (childNodeAsMap.isEmpty() == false) { + nodeAsMap.put(child.getNodeName(), childNodeAsMap); + } + } + } + } + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactory.java b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactory.java new file mode 100644 index 0000000000000..c7577ff07de6d --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactory.java @@ -0,0 +1,122 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.Location; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.List; + +public class XmlLogStructureFinderFactory implements LogStructureFinderFactory { + + private final XMLInputFactory xmlFactory; + + public XmlLogStructureFinderFactory() { + xmlFactory = XMLInputFactory.newInstance(); + xmlFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE); + xmlFactory.setProperty(XMLInputFactory.IS_VALIDATING, Boolean.FALSE); + } + + /** + * This format matches if the sample consists of one or more XML documents, + * all with the same root element name. If there is more than one document, + * only whitespace is allowed in between them. The last one does not + * necessarily have to be complete (as the sample could have truncated it). + */ + @Override + public boolean canCreateFromSample(List explanation, String sample) { + + int completeDocCount = 0; + String commonRootElementName = null; + String remainder = sample.trim(); + boolean mightBeAnotherDocument = !remainder.isEmpty(); + + // This processing is extremely complicated because it's necessary + // to create a new XML stream reader per document, but each one + // will read ahead so will potentially consume characters from the + // following document. We must therefore also recreate the string + // reader for each document. + while (mightBeAnotherDocument) { + + try (Reader reader = new StringReader(remainder)) { + + XMLStreamReader xmlReader = xmlFactory.createXMLStreamReader(reader); + try { + int nestingLevel = 0; + while ((mightBeAnotherDocument = xmlReader.hasNext())) { + switch (xmlReader.next()) { + case XMLStreamReader.START_ELEMENT: + if (nestingLevel++ == 0) { + String rootElementName = xmlReader.getLocalName(); + if (commonRootElementName == null) { + commonRootElementName = rootElementName; + } else if (commonRootElementName.equals(rootElementName) == false) { + explanation.add("Not XML because different documents have different root " + + "element names: [" + commonRootElementName + "] and [" + rootElementName + "]"); + return false; + } + } + break; + case XMLStreamReader.END_ELEMENT: + if (--nestingLevel < 0) { + explanation.add("Not XML because an end element occurs before a start element"); + return false; + } + break; + } + if (nestingLevel == 0) { + ++completeDocCount; + // Find the position that's one character beyond end of the end element. + // The next document (if there is one) must start after this (possibly + // preceeded by whitespace). + Location location = xmlReader.getLocation(); + int endPos = 0; + // Line and column numbers start at 1, not 0 + for (int wholeLines = location.getLineNumber() - 1; wholeLines > 0; --wholeLines) { + endPos = remainder.indexOf('\n', endPos) + 1; + if (endPos == 0) { + explanation.add("Not XML because XML parser location is inconsistent: line [" + + location.getLineNumber() + "], column [" + location.getColumnNumber() + "] in [" + remainder + "]"); + return false; + } + } + endPos += location.getColumnNumber() - 1; + remainder = remainder.substring(endPos).trim(); + mightBeAnotherDocument = !remainder.isEmpty(); + break; + } + } + } finally { + xmlReader.close(); + } + } catch (IOException | XMLStreamException e) { + explanation.add("Not XML because there was a parsing exception: [" + e.getMessage().replaceAll("\\s?\r?\n\\s?", " ") + "]"); + return false; + } + } + + if (completeDocCount == 0) { + explanation.add("Not XML because sample didn't contain a complete document"); + return false; + } + + explanation.add("Deciding sample is XML"); + return true; + } + + @Override + public LogStructureFinder createFromSample(List explanation, String sample, String charsetName, Boolean hasByteOrderMarker) + throws IOException, ParserConfigurationException, SAXException { + return XmlLogStructureFinder.makeXmlLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..f53ee008d691e --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/CsvLogStructureFinderFactoryTests.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory(); + + // No need to check JSON or XML because they come earlier in the order we check formats + + public void testCanCreateFromSampleGivenCsv() { + + assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenTsv() { + + assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java new file mode 100644 index 0000000000000..87f9f662698ef --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreatorTests.java @@ -0,0 +1,326 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.GrokPatternCreator.ValueOnlyGrokPatternCandidate; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.containsInAnyOrder; + +public class GrokPatternCreatorTests extends LogStructureTestCase { + + public void testBuildFieldName() { + Map fieldNameCountStore = new HashMap<>(); + assertEquals("field", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("field2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("field3", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("extra_timestamp", GrokPatternCreator.buildFieldName(fieldNameCountStore, "extra_timestamp")); + assertEquals("field4", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + assertEquals("uri", GrokPatternCreator.buildFieldName(fieldNameCountStore, "uri")); + assertEquals("extra_timestamp2", GrokPatternCreator.buildFieldName(fieldNameCountStore, "extra_timestamp")); + assertEquals("field5", GrokPatternCreator.buildFieldName(fieldNameCountStore, "field")); + } + + public void testPopulatePrefacesAndEpiloguesGivenTimestamp() { + + Collection matchingStrings = Arrays.asList("[2018-01-25T15:33:23] DEBUG ", + "[2018-01-24T12:33:23] ERROR ", + "junk [2018-01-22T07:33:23] INFO ", + "[2018-01-21T03:33:23] DEBUG "); + ValueOnlyGrokPatternCandidate candidate = new ValueOnlyGrokPatternCandidate("TIMESTAMP_ISO8601", "date", "extra_timestamp"); + + Map fieldNameCountStore = new HashMap<>(); + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + + candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null); + + assertThat(prefaces, containsInAnyOrder("[", "[", "junk [", "[")); + assertThat(epilogues, containsInAnyOrder("] DEBUG ", "] ERROR ", "] INFO ", "] DEBUG ")); + } + + public void testPopulatePrefacesAndEpiloguesGivenEmailAddress() { + + Collection matchingStrings = Arrays.asList("before alice@acme.com after", + "abc bob@acme.com xyz", + "carol@acme.com"); + ValueOnlyGrokPatternCandidate candidate = new ValueOnlyGrokPatternCandidate("EMAILADDRESS", "keyword", "email"); + + Map fieldNameCountStore = new HashMap<>(); + Collection prefaces = new ArrayList<>(); + Collection epilogues = new ArrayList<>(); + + candidate.processCaptures(fieldNameCountStore, matchingStrings, prefaces, epilogues, null); + + assertThat(prefaces, containsInAnyOrder("before ", "abc ", "")); + assertThat(epilogues, containsInAnyOrder(" after", " xyz", "")); + } + + public void testAppendBestGrokMatchForStringsGivenTimestampsAndLogLevels() { + + Collection snippets = Arrays.asList("[2018-01-25T15:33:23] DEBUG ", + "[2018-01-24T12:33:23] ERROR ", + "junk [2018-01-22T07:33:23] INFO ", + "[2018-01-21T03:33:23] DEBUG "); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?\\[%{TIMESTAMP_ISO8601:extra_timestamp}\\] %{LOGLEVEL:loglevel} ", + grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenNumbersInBrackets() { + + Collection snippets = Arrays.asList("(-2)", + " (-3)", + " (4)", + " (-5) "); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?\\(%{INT:field}\\).*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenNegativeNumbersWithoutBreak() { + + Collection snippets = Arrays.asList("before-2 ", + "prior to-3", + "-4"); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + // It seems sensible that we don't detect these suffices as either base 10 or base 16 numbers + assertEquals(".*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenHexNumbers() { + + Collection snippets = Arrays.asList(" abc", + " 123", + " -123", + "1f is hex"); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?%{BASE16NUM:field}.*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenHostnamesWithNumbers() { + + Collection snippets = Arrays.asList(" snippets = Arrays.asList("before alice@acme.com after", + "abc bob@acme.com xyz", + "carol@acme.com"); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?%{EMAILADDRESS:email}.*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenUris() { + + Collection snippets = Arrays.asList("main site https://www.elastic.co/ with trailing slash", + "https://www.elastic.co/guide/en/x-pack/current/ml-configuring-categories.html#ml-configuring-categories is a section", + "download today from https://www.elastic.co/downloads"); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?%{URI:uri}.*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenPaths() { + + Collection snippets = Arrays.asList("on Mac /Users/dave", + "on Windows C:\\Users\\dave", + "on Linux /home/dave"); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*? .*? %{PATH:path}", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testAppendBestGrokMatchForStringsGivenKvPairs() { + + Collection snippets = Arrays.asList("foo=1 and bar=a", + "something foo=2 bar=b something else", + "foo=3 bar=c", + " foo=1 bar=a "); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + grokPatternCreator.appendBestGrokMatchForStrings(false, snippets, false, 0); + + assertEquals(".*?\\bfoo=%{USER:foo} .*?\\bbar=%{USER:bar}.*?", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + } + + public void testCreateGrokPatternFromExamplesGivenNamedLogs() { + + Collection sampleMessages = Arrays.asList( + "Sep 8 11:55:06 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'elastic.slack.com/A/IN': 95.110.64.205#53", + "Sep 8 11:55:08 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'slack-imgs.com/A/IN': 95.110.64.205#53", + "Sep 8 11:55:35 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53", + "Sep 8 11:55:42 linux named[22529]: error (unexpected RCODE REFUSED) resolving 'b.akamaiedge.net/A/IN': 95.110.64.205#53"); + + Map mappings = new HashMap<>(); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + + assertEquals("%{SYSLOGTIMESTAMP:timestamp} .*? .*?\\[%{INT:field}\\]: %{LOGLEVEL:loglevel} \\(.*? .*? .*?\\) .*? " + + "%{QUOTEDSTRING:field2}: %{IP:ipaddress}#%{INT:field3}", + grokPatternCreator.createGrokPatternFromExamples("SYSLOGTIMESTAMP", "timestamp")); + assertEquals(5, mappings.size()); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("loglevel")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("field2")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "ip"), mappings.get("ipaddress")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field3")); + } + + public void testCreateGrokPatternFromExamplesGivenCatalinaLogs() { + + Collection sampleMessages = Arrays.asList( + "Aug 29, 2009 12:03:33 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:40 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:45 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored.", + "Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters\nWARNING: Parameters: " + + "Invalid chunk ignored."); + + Map mappings = new HashMap<>(); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + + assertEquals("%{CATALINA_DATESTAMP:timestamp} .*? .*?\\n%{LOGLEVEL:loglevel}: .*", + grokPatternCreator.createGrokPatternFromExamples("CATALINA_DATESTAMP", "timestamp")); + assertEquals(1, mappings.size()); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("loglevel")); + } + + public void testCreateGrokPatternFromExamplesGivenMultiTimestampLogs() { + + // Two timestamps: one local, one UTC + Collection sampleMessages = Arrays.asList( + "559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912548986880\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t9049724\tserv02nw03\t10.120.48.147\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912548986887\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t884343\tserv02tw03\t192.168.121.189\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp", + "559550912603512850\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t8907014\tserv02nw01\t192.168.118.208\tAuthpriv\t" + + "Info\tsshd\tsubsystem request for sftp"); + + Map mappings = new HashMap<>(); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + + assertEquals("%{INT:field}\\t%{TIMESTAMP_ISO8601:timestamp}\\t%{TIMESTAMP_ISO8601:extra_timestamp}\\t%{INT:field2}\\t.*?\\t" + + "%{IP:ipaddress}\\t.*?\\t%{LOGLEVEL:loglevel}\\t.*", + grokPatternCreator.createGrokPatternFromExamples("TIMESTAMP_ISO8601", "timestamp")); + assertEquals(5, mappings.size()); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date"), + mappings.get("extra_timestamp")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("field2")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "ip"), mappings.get("ipaddress")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("loglevel")); + } + + public void testFindFullLineGrokPatternGivenApacheCombinedLogs() { + Collection sampleMessages = Arrays.asList( + "83.149.9.216 - - [19/Jan/2016:08:13:42 +0000] " + + "\"GET /presentations/logstash-monitorama-2013/images/kibana-search.png HTTP/1.1\" 200 203023 " + + "\"http://semicomplete.com/presentations/logstash-monitorama-2013/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\"", + "83.149.9.216 - - [19/Jan/2016:08:13:44 +0000] " + + "\"GET /presentations/logstash-monitorama-2013/plugin/zoom-js/zoom.js HTTP/1.1\" 200 7697 " + + "\"http://semicomplete.com/presentations/logstash-monitorama-2013/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\"", + "83.149.9.216 - - [19/Jan/2016:08:13:44 +0000] " + + "\"GET /presentations/logstash-monitorama-2013/plugin/highlight/highlight.js HTTP/1.1\" 200 26185 " + + "\"http://semicomplete.com/presentations/logstash-monitorama-2013/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\"", + "83.149.9.216 - - [19/Jan/2016:08:13:42 +0000] " + + "\"GET /presentations/logstash-monitorama-2013/images/sad-medic.png HTTP/1.1\" 200 430406 " + + "\"http://semicomplete.com/presentations/logstash-monitorama-2013/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.77 Safari/537.36\""); + + Map mappings = new HashMap<>(); + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings); + + assertEquals(new Tuple<>("timestamp", "%{COMBINEDAPACHELOG}"), grokPatternCreator.findFullLineGrokPattern()); + assertEquals(10, mappings.size()); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "text"), mappings.get("agent")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("auth")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("bytes")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "ip"), mappings.get("clientip")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "double"), mappings.get("httpversion")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("ident")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("referrer")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("request")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("response")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("verb")); + } + + public void testAdjustForPunctuationGivenCommonPrefix() { + Collection snippets = Arrays.asList( + "\",\"lab6.localhost\",\"Route Domain\",\"/Common/0\",\"No-lookup\",\"192.168.33.212\",\"No-lookup\",\"192.168.33.132\"," + + "\"80\",\"46721\",\"/Common/Subnet_33\",\"TCP\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Staged\",\"/Common/policy1\"" + + ",\"rule1\",\"Accept\",\"\",\"\",\"\",\"0000000000000000\"", + "\",\"lab6.localhost\",\"Route Domain\",\"/Common/0\",\"No-lookup\",\"192.168.143.244\",\"No-lookup\",\"192.168.33.106\"," + + "\"55025\",\"162\",\"/Common/Subnet_33\",\"UDP\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Staged\",\"/Common/policy1\"" + + ",\"rule1\",\"Accept\",\"\",\"\",\"\",\"0000000000000000\"", + "\",\"lab6.localhost\",\"Route Domain\",\"/Common/0\",\"No-lookup\",\"192.168.33.3\",\"No-lookup\",\"224.0.0.102\"," + + "\"3222\",\"3222\",\"/Common/Subnet_33\",\"UDP\",\"0\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"Staged\",\"/Common/policy1\"" + + ",\"rule1\",\"Accept\",\"\",\"\",\"\",\"0000000000000000\"" + ); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + Collection adjustedSnippets = grokPatternCreator.adjustForPunctuation(snippets); + + assertEquals("\",", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + assertNotNull(adjustedSnippets); + assertThat(new ArrayList<>(adjustedSnippets), + containsInAnyOrder(snippets.stream().map(snippet -> snippet.substring(2)).toArray(String[]::new))); + } + + public void testAdjustForPunctuationGivenNoCommonPrefix() { + Collection snippets = Arrays.asList( + "|client (id:2) was removed from servergroup 'Normal'(id:7) by client 'User1'(id:2)", + "|servergroup 'GAME'(id:9) was added by 'User1'(id:2)", + "|permission 'i_group_auto_update_type'(id:146) with values (value:30, negated:0, skipchannel:0) " + + "was added by 'User1'(id:2) to servergroup 'GAME'(id:9)" + ); + + GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, snippets, null); + Collection adjustedSnippets = grokPatternCreator.adjustForPunctuation(snippets); + + assertEquals("", grokPatternCreator.getOverallGrokPatternBuilder().toString()); + assertSame(snippets, adjustedSnippets); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..39ef3b9eedbba --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderFactoryTests.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new JsonLogStructureFinderFactory(); + + public void testCanCreateFromSampleGivenJson() { + + assertTrue(factory.canCreateFromSample(explanation, JSON_SAMPLE)); + } + + public void testCanCreateFromSampleGivenXml() { + + assertFalse(factory.canCreateFromSample(explanation, XML_SAMPLE)); + } + + public void testCanCreateFromSampleGivenCsv() { + + assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenTsv() { + + assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java new file mode 100644 index 0000000000000..2f727747bbff3 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/JsonLogStructureFinderTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.Collections; + +public class JsonLogStructureFinderTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new JsonLogStructureFinderFactory(); + + public void testCreateConfigsGivenGoodJson() throws Exception { + assertTrue(factory.canCreateFromSample(explanation, JSON_SAMPLE)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, JSON_SAMPLE, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.JSON, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertNull(structure.getExcludeLinesPattern()); + assertNull(structure.getMultilineStartPattern()); + assertNull(structure.getSeparator()); + assertNull(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertNull(structure.getGrokPattern()); + assertEquals("timestamp", structure.getTimestampField()); + assertEquals(Collections.singletonList("UNIX_MS"), structure.getTimestampFormats()); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java new file mode 100644 index 0000000000000..1f8691de8cf65 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManagerTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import com.ibm.icu.text.CharsetMatch; + +import java.io.ByteArrayInputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +import static org.hamcrest.Matchers.startsWith; +import static org.hamcrest.core.IsInstanceOf.instanceOf; + +public class LogStructureFinderManagerTests extends LogStructureTestCase { + + private LogStructureFinderManager structureFinderManager = new LogStructureFinderManager(); + + public void testFindCharsetGivenCharacterWidths() throws Exception { + + for (Charset charset : Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.UTF_16LE, StandardCharsets.UTF_16BE)) { + CharsetMatch charsetMatch = structureFinderManager.findCharset(explanation, + new ByteArrayInputStream(TEXT_SAMPLE.getBytes(charset))); + assertEquals(charset.name(), charsetMatch.getName()); + } + } + + public void testFindCharsetGivenBinary() throws Exception { + + // This input should never match a single byte character set. ICU4J will sometimes decide + // that it matches a double byte character set, hence the two assertion branches. + int size = 1000; + byte[] binaryBytes = randomByteArrayOfLength(size); + for (int i = 0; i < 10; ++i) { + binaryBytes[randomIntBetween(0, size - 1)] = 0; + } + + try { + CharsetMatch charsetMatch = structureFinderManager.findCharset(explanation, new ByteArrayInputStream(binaryBytes)); + assertThat(charsetMatch.getName(), startsWith("UTF-16")); + } catch (IllegalArgumentException e) { + assertEquals("Could not determine a usable character encoding for the input - could it be binary data?", e.getMessage()); + } + } + + public void testMakeBestStructureGivenJson() throws Exception { + assertThat(structureFinderManager.makeBestStructureFinder(explanation, + "{ \"time\": \"2018-05-17T13:41:23\", \"message\": \"hello\" }", StandardCharsets.UTF_8.name(), randomBoolean()), + instanceOf(JsonLogStructureFinder.class)); + } + + public void testMakeBestStructureGivenXml() throws Exception { + assertThat(structureFinderManager.makeBestStructureFinder(explanation, + "hello", StandardCharsets.UTF_8.name(), randomBoolean()), + instanceOf(XmlLogStructureFinder.class)); + } + + public void testMakeBestStructureGivenCsv() throws Exception { + assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" + + "2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()), + instanceOf(SeparatedValuesLogStructureFinder.class)); + } + + public void testMakeBestStructureGivenText() throws Exception { + assertThat(structureFinderManager.makeBestStructureFinder(explanation, "[2018-05-17T13:41:23] hello\n" + + "[2018-05-17T13:41:24] hello again\n", StandardCharsets.UTF_8.name(), randomBoolean()), + instanceOf(TextLogStructureFinder.class)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java new file mode 100644 index 0000000000000..5f9a87ef2a7f2 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTestCase.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.test.ESTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.stream.Collectors; + +public abstract class LogStructureTestCase extends ESTestCase { + + protected static final List POSSIBLE_CHARSETS = Collections.unmodifiableList(Charset.availableCharsets().keySet().stream() + .filter(name -> LogStructureFinderManager.FILEBEAT_SUPPORTED_ENCODINGS.contains(name.toLowerCase(Locale.ROOT))) + .collect(Collectors.toList())); + + protected static final String CSV_SAMPLE = "time,id,value\n" + + "2018-05-17T16:23:40,key1,42.0\n" + + "2018-05-17T16:24:11,\"key with spaces\",42.0\n"; + + protected static final String JSON_SAMPLE = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," + + "\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," + + "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" + + "{\"logger\":\"controller\",\"timestamp\":1478261151445," + + "\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," + + "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n"; + + protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" + + "listening on 0.0.0.0:9987, :::9987\n" + + "2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " + + "'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" + + "2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " + + "'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)"; + + protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" + + "\"timestamp\"\n" + + "\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" + + "\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" + + "\"3\";\"3\";\"4703.7816\";\"1527.4716\";\"360.3\";\"2017-01-19 16:19:06.738842\""; + + protected static final String TEXT_SAMPLE = "[2018-05-11T17:07:29,461][INFO ][o.e.n.Node ] [node-0] initializing ...\n" + + "[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] using [1] data paths, mounts [[/ (/dev/disk1)]], " + + "net usable_space [223.4gb], net total_space [464.7gb], types [hfs]\n" + + "[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], " + + "compressed ordinary object pointers [true]\n" + + "[2018-05-11T17:07:29,556][INFO ][o.e.n.Node ] [node-0] node name [node-0], node ID [tJ9u8HcaTbWxRtnlfz1RQA]\n"; + + protected static final String TSV_SAMPLE = "time\tid\tvalue\n" + + "2018-05-17T16:23:40\tkey1\t42.0\n" + + "2018-05-17T16:24:11\t\"key with spaces\"\t42.0\n"; + + protected static final String XML_SAMPLE = "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n" + + "\n"; + + protected List explanation; + + @Before + public void initExplanation() { + explanation = new ArrayList<>(); + } + + @After + public void printExplanation() { + Loggers.getLogger(getClass()).info("Explanation:\n" + String.join("\n", explanation)); + } + + protected Boolean randomHasByteOrderMarker(String charset) { + return charset.toUpperCase(Locale.ROOT).startsWith("UTF") ? randomBoolean() : null; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java new file mode 100644 index 0000000000000..738928ed28a37 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureTests.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.nio.charset.Charset; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.Locale; +import java.util.Map; +import java.util.TreeMap; + +public class LogStructureTests extends AbstractXContentTestCase { + + protected LogStructure createTestInstance() { + + LogStructure.Format format = randomFrom(EnumSet.allOf(LogStructure.Format.class)); + + LogStructure.Builder builder = new LogStructure.Builder(format); + + int numLinesAnalyzed = randomIntBetween(2, 10000); + builder.setNumLinesAnalyzed(numLinesAnalyzed); + int numMessagesAnalyzed = randomIntBetween(1, numLinesAnalyzed); + builder.setNumMessagesAnalyzed(numMessagesAnalyzed); + builder.setSampleStart(randomAlphaOfLength(1000)); + + String charset = randomFrom(Charset.availableCharsets().keySet()); + builder.setCharset(charset); + if (charset.toUpperCase(Locale.ROOT).startsWith("UTF")) { + builder.setHasByteOrderMarker(randomBoolean()); + } + + if (numMessagesAnalyzed < numLinesAnalyzed) { + builder.setMultilineStartPattern(randomAlphaOfLength(100)); + } + if (randomBoolean()) { + builder.setExcludeLinesPattern(randomAlphaOfLength(100)); + } + + if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) { + builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false))); + } + if (format.isSeparatedValues()) { + builder.setHasHeaderRow(randomBoolean()); + if (rarely()) { + builder.setSeparator(format.separator()); + } + } + if (format.isSemiStructured()) { + builder.setGrokPattern(randomAlphaOfLength(100)); + } + + if (format.isSemiStructured() || randomBoolean()) { + builder.setTimestampField(randomAlphaOfLength(10)); + builder.setTimestampFormats(Arrays.asList(generateRandomStringArray(3, 20, false, false))); + builder.setNeedClientTimezone(randomBoolean()); + } + + Map mappings = new TreeMap<>(); + for (String field : generateRandomStringArray(5, 20, false, false)) { + mappings.put(field, Collections.singletonMap(randomAlphaOfLength(5), randomAlphaOfLength(10))); + } + builder.setMappings(mappings); + + builder.setExplanation(Arrays.asList(generateRandomStringArray(10, 150, false, false))); + + return builder.build(); + } + + protected LogStructure doParseInstance(XContentParser parser) { + return LogStructure.PARSER.apply(parser, null).build(); + } + + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java new file mode 100644 index 0000000000000..7e92728f01aa0 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureUtilsTests.java @@ -0,0 +1,292 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.contains; + +public class LogStructureUtilsTests extends LogStructureTestCase { + + public void testMoreLikelyGivenText() { + assertTrue(LogStructureUtils.isMoreLikelyTextThanKeyword("the quick brown fox jumped over the lazy dog")); + assertTrue(LogStructureUtils.isMoreLikelyTextThanKeyword(randomAlphaOfLengthBetween(257, 10000))); + } + + public void testMoreLikelyGivenKeyword() { + assertFalse(LogStructureUtils.isMoreLikelyTextThanKeyword("1")); + assertFalse(LogStructureUtils.isMoreLikelyTextThanKeyword("DEBUG")); + assertFalse(LogStructureUtils.isMoreLikelyTextThanKeyword(randomAlphaOfLengthBetween(1, 256))); + } + + public void testSingleSampleSingleField() { + Map sample = Collections.singletonMap("field1", "2018-05-24T17:28:31,735"); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample)); + assertNotNull(match); + assertEquals("field1", match.v1()); + assertThat(match.v2().dateFormats, contains("ISO8601")); + assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); + } + + public void testSamplesWithSameSingleTimeField() { + Map sample1 = Collections.singletonMap("field1", "2018-05-24T17:28:31,735"); + Map sample2 = Collections.singletonMap("field1", "2018-05-24T17:33:39,406"); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(match); + assertEquals("field1", match.v1()); + assertThat(match.v2().dateFormats, contains("ISO8601")); + assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); + } + + public void testSamplesWithOneSingleTimeFieldDifferentFormat() { + Map sample1 = Collections.singletonMap("field1", "2018-05-24T17:28:31,735"); + Map sample2 = Collections.singletonMap("field1", "2018-05-24 17:33:39,406"); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNull(match); + } + + public void testSamplesWithDifferentSingleTimeField() { + Map sample1 = Collections.singletonMap("field1", "2018-05-24T17:28:31,735"); + Map sample2 = Collections.singletonMap("another_field", "2018-05-24T17:33:39,406"); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNull(match); + } + + public void testSingleSampleManyFieldsOneTimeFormat() { + Map sample = new LinkedHashMap<>(); + sample.put("foo", "not a time"); + sample.put("time", "2018-05-24 17:28:31,735"); + sample.put("bar", 42); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Collections.singletonList(sample)); + assertNotNull(match); + assertEquals("time", match.v1()); + assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); + } + + public void testSamplesWithManyFieldsSameSingleTimeFormat() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time", "2018-05-24 17:28:31,735"); + sample1.put("bar", 42); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time", "2018-05-29 11:53:02,837"); + sample2.put("bar", 17); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(match); + assertEquals("time", match.v1()); + assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); + } + + public void testSamplesWithManyFieldsSameTimeFieldDifferentTimeFormat() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time", "2018-05-24 17:28:31,735"); + sample1.put("bar", 42); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time", "May 29 2018 11:53:02"); + sample2.put("bar", 17); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNull(match); + } + + public void testSamplesWithManyFieldsSameSingleTimeFormatDistractionBefore() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("red_herring", "May 29 2007 11:53:02"); + sample1.put("time", "2018-05-24 17:28:31,735"); + sample1.put("bar", 42); + Map sample2 = new LinkedHashMap<>(); + sample2.put("red_herring", "whatever"); + sample2.put("time", "2018-05-29 11:53:02,837"); + sample2.put("bar", 17); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(match); + assertEquals("time", match.v1()); + assertThat(match.v2().dateFormats, contains("YYYY-MM-dd HH:mm:ss,SSS")); + assertEquals("TIMESTAMP_ISO8601", match.v2().grokPatternName); + } + + public void testSamplesWithManyFieldsSameSingleTimeFormatDistractionAfter() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time", "May 24 2018 17:28:31"); + sample1.put("red_herring", "2018-05-24 17:28:31,735"); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time", "May 29 2018 11:53:02"); + sample2.put("red_herring", "17"); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(match); + assertEquals("time", match.v1()); + assertThat(match.v2().dateFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss")); + assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName); + } + + public void testSamplesWithManyFieldsInconsistentTimeFields() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time1", "May 24 2018 17:28:31"); + sample1.put("bar", 17); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time2", "May 29 2018 11:53:02"); + sample2.put("bar", 42); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNull(match); + } + + public void testSamplesWithManyFieldsInconsistentAndConsistentTimeFields() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time1", "2018-05-09 17:28:31,735"); + sample1.put("time2", "May 9 2018 17:28:31"); + sample1.put("bar", 17); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time2", "May 10 2018 11:53:02"); + sample2.put("time3", "Thu, May 10 2018 11:53:02"); + sample2.put("bar", 42); + Tuple match = + LogStructureUtils.guessTimestampField(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(match); + assertEquals("time2", match.v1()); + assertThat(match.v2().dateFormats, contains("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss")); + assertEquals("CISCOTIMESTAMP", match.v2().grokPatternName); + } + + public void testGuessMappingGivenNothing() { + assertNull(LogStructureUtils.guessMapping(explanation, "foo", Collections.emptyList())); + } + + public void testGuessMappingGivenKeyword() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("ERROR", "INFO", "DEBUG"))); + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("2018-06-11T13:26:47Z", "not a date"))); + } + + public void testGuessMappingGivenText() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "text"); + + assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", + Arrays.asList("a", "the quick brown fox jumped over the lazy dog"))); + } + + public void testGuessMappingGivenIp() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "ip"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("10.0.0.1", "172.16.0.1", "192.168.0.1"))); + } + + public void testGuessMappingGivenDouble() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "double"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("3.14159265359", "0", "-8"))); + // 12345678901234567890 is too long for long + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("1", "2", "12345678901234567890"))); + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(3.14159265359, 0.0, 1e-308))); + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("-1e-1", "-1e308", "1e-308"))); + } + + public void testGuessMappingGivenLong() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("500", "3", "-3"))); + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(500, 6, 0))); + } + + public void testGuessMappingGivenDate() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "date"); + + assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", + Arrays.asList("2018-06-11T13:26:47Z", "2018-06-11T13:27:12Z"))); + } + + public void testGuessMappingGivenBoolean() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "boolean"); + + assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList("false", "true"))); + assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(true, false))); + } + + public void testGuessMappingGivenArray() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(42, Arrays.asList(1, -99)))); + + expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"); + + assertEquals(expected, + LogStructureUtils.guessMapping(explanation, "foo", Arrays.asList(new String[]{ "x", "y" }, "z"))); + } + + public void testGuessMappingGivenObject() { + Map expected = Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "object"); + + assertEquals(expected, LogStructureUtils.guessMapping(explanation, "foo", + Arrays.asList(Collections.singletonMap("name", "value1"), Collections.singletonMap("name", "value2")))); + } + + public void testGuessMappingGivenObjectAndNonObject() { + RuntimeException e = expectThrows(RuntimeException.class, () -> LogStructureUtils.guessMapping(explanation, + "foo", Arrays.asList(Collections.singletonMap("name", "value1"), "value2"))); + + assertEquals("Field [foo] has both object and non-object values - this is not supported by Elasticsearch", e.getMessage()); + } + + public void testGuessMappings() { + Map sample1 = new LinkedHashMap<>(); + sample1.put("foo", "not a time"); + sample1.put("time", "2018-05-24 17:28:31,735"); + sample1.put("bar", 42); + sample1.put("nothing", null); + Map sample2 = new LinkedHashMap<>(); + sample2.put("foo", "whatever"); + sample2.put("time", "2018-05-29 11:53:02,837"); + sample2.put("bar", 17); + sample2.put("nothing", null); + + Map mappings = LogStructureUtils.guessMappings(explanation, Arrays.asList(sample1, sample2)); + assertNotNull(mappings); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "keyword"), mappings.get("foo")); + Map expectedTimeMapping = new HashMap<>(); + expectedTimeMapping.put(LogStructureUtils.MAPPING_TYPE_SETTING, "date"); + expectedTimeMapping.put(LogStructureUtils.MAPPING_FORMAT_SETTING, "YYYY-MM-dd HH:mm:ss,SSS"); + assertEquals(expectedTimeMapping, mappings.get("time")); + assertEquals(Collections.singletonMap(LogStructureUtils.MAPPING_TYPE_SETTING, "long"), mappings.get("bar")); + assertNull(mappings.get("nothing")); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..3fd2fb7840ac9 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/PipeSeparatedValuesLogStructureFinderFactoryTests.java @@ -0,0 +1,23 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory(); + + // No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..64dad7e078cdf --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SemiColonSeparatedValuesLogStructureFinderFactoryTests.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory(); + + // No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats + + public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + + assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java new file mode 100644 index 0000000000000..b62832a0a19cb --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/SeparatedValuesLogStructureFinderTests.java @@ -0,0 +1,293 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.supercsv.prefs.CsvPreference; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; + +import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows; +import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance; +import static org.hamcrest.Matchers.arrayContaining; + +public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory(); + + public void testCreateConfigsGivenCompleteCsv() throws Exception { + String sample = "time,message\n" + + "2018-05-17T13:41:23,hello\n" + + "2018-05-17T13:41:32,hello again\n"; + assertTrue(factory.canCreateFromSample(explanation, sample)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern()); + assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); + assertEquals(Character.valueOf(','), structure.getSeparator()); + assertTrue(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals(Arrays.asList("time", "message"), structure.getInputFields()); + assertNull(structure.getGrokPattern()); + assertEquals("time", structure.getTimestampField()); + assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats()); + } + + public void testCreateConfigsGivenCsvWithIncompleteLastRecord() throws Exception { + String sample = "message,time,count\n" + + "\"hello\n" + + "world\",2018-05-17T13:41:23,1\n" + + "\"hello again\n"; // note that this last record is truncated + assertTrue(factory.canCreateFromSample(explanation, sample)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern()); + assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); + assertEquals(Character.valueOf(','), structure.getSeparator()); + assertTrue(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields()); + assertNull(structure.getGrokPattern()); + assertEquals("time", structure.getTimestampField()); + assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats()); + } + + public void testCreateConfigsGivenCsvWithTrailingNulls() throws Exception { + String sample = "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID," + + "store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount," + + "improvement_surcharge,total_amount,,\n" + + "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + + "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; + assertTrue(factory.canCreateFromSample(explanation, sample)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertEquals("^\"?VendorID\"?,\"?tpep_pickup_datetime\"?,\"?tpep_dropoff_datetime\"?,\"?passenger_count\"?,\"?trip_distance\"?," + + "\"?RatecodeID\"?,\"?store_and_fwd_flag\"?,\"?PULocationID\"?,\"?DOLocationID\"?,\"?payment_type\"?,\"?fare_amount\"?," + + "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?", + structure.getExcludeLinesPattern()); + assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); + assertEquals(Character.valueOf(','), structure.getSeparator()); + assertTrue(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", + "RatecodeID", "store_and_fwd_flag", "PULocationID", "DOLocationID", "payment_type", "fare_amount", "extra", "mta_tax", + "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount", "column18", "column19"), structure.getInputFields()); + assertNull(structure.getGrokPattern()); + assertEquals("tpep_pickup_datetime", structure.getTimestampField()); + assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats()); + } + + public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeader() throws Exception { + String sample = "VendorID,tpep_pickup_datetime,tpep_dropoff_datetime,passenger_count,trip_distance,RatecodeID," + + "store_and_fwd_flag,PULocationID,DOLocationID,payment_type,fare_amount,extra,mta_tax,tip_amount,tolls_amount," + + "improvement_surcharge,total_amount\n" + + "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + + "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; + assertTrue(factory.canCreateFromSample(explanation, sample)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertEquals("^\"?VendorID\"?,\"?tpep_pickup_datetime\"?,\"?tpep_dropoff_datetime\"?,\"?passenger_count\"?,\"?trip_distance\"?," + + "\"?RatecodeID\"?,\"?store_and_fwd_flag\"?,\"?PULocationID\"?,\"?DOLocationID\"?,\"?payment_type\"?,\"?fare_amount\"?," + + "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?", + structure.getExcludeLinesPattern()); + assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); + assertEquals(Character.valueOf(','), structure.getSeparator()); + assertTrue(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", + "RatecodeID", "store_and_fwd_flag", "PULocationID", "DOLocationID", "payment_type", "fare_amount", "extra", "mta_tax", + "tip_amount", "tolls_amount", "improvement_surcharge", "total_amount"), structure.getInputFields()); + assertNull(structure.getGrokPattern()); + assertEquals("tpep_pickup_datetime", structure.getTimestampField()); + assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getTimestampFormats()); + } + + public void testCreateConfigsGivenCsvWithTimeLastColumn() throws Exception { + String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" + + "\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" + + "\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n"; + assertTrue(factory.canCreateFromSample(explanation, sample)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.CSV, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?", + structure.getExcludeLinesPattern()); + assertNull(structure.getMultilineStartPattern()); + assertEquals(Character.valueOf(','), structure.getSeparator()); + assertTrue(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields()); + assertNull(structure.getGrokPattern()); + assertEquals("timestamp", structure.getTimestampField()); + assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss.SSSSSS"), structure.getTimestampFormats()); + } + + public void testFindHeaderFromSampleGivenHeaderInSample() throws IOException { + String withHeader = "time,airline,responsetime,sourcetype\n" + + "2014-06-23 00:00:00Z,AAL,132.2046,farequote\n" + + "2014-06-23 00:00:00Z,JZA,990.4628,farequote\n" + + "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + + "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; + + Tuple header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, + SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1()); + + assertTrue(header.v1()); + assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype")); + } + + public void testFindHeaderFromSampleGivenHeaderNotInSample() throws IOException { + String withoutHeader = "2014-06-23 00:00:00Z,AAL,132.2046,farequote\n" + + "2014-06-23 00:00:00Z,JZA,990.4628,farequote\n" + + "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + + "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; + + Tuple header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, + SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1()); + + assertFalse(header.v1()); + assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4")); + } + + public void testLevenshteinDistance() { + + assertEquals(0, levenshteinDistance("cat", "cat")); + assertEquals(3, levenshteinDistance("cat", "dog")); + assertEquals(5, levenshteinDistance("cat", "mouse")); + assertEquals(3, levenshteinDistance("cat", "")); + + assertEquals(3, levenshteinDistance("dog", "cat")); + assertEquals(0, levenshteinDistance("dog", "dog")); + assertEquals(4, levenshteinDistance("dog", "mouse")); + assertEquals(3, levenshteinDistance("dog", "")); + + assertEquals(5, levenshteinDistance("mouse", "cat")); + assertEquals(4, levenshteinDistance("mouse", "dog")); + assertEquals(0, levenshteinDistance("mouse", "mouse")); + assertEquals(5, levenshteinDistance("mouse", "")); + + assertEquals(3, levenshteinDistance("", "cat")); + assertEquals(3, levenshteinDistance("", "dog")); + assertEquals(5, levenshteinDistance("", "mouse")); + assertEquals(0, levenshteinDistance("", "")); + } + + public void testLevenshteinCompareRows() { + + assertEquals(0, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog"), Arrays.asList("cat", "dog"))); + assertEquals(0, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog"), Arrays.asList("cat", "cat"))); + assertEquals(3, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog"), Arrays.asList("dog", "cat"))); + assertEquals(3, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog"), Arrays.asList("mouse", "cat"))); + assertEquals(5, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog", "mouse"), Arrays.asList("mouse", "dog", "cat"))); + assertEquals(4, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog", "mouse"), Arrays.asList("mouse", "mouse", "mouse"))); + assertEquals(7, levenshteinFieldwiseCompareRows(Arrays.asList("cat", "dog", "mouse"), Arrays.asList("mouse", "cat", "dog"))); + } + + public void testLineHasUnescapedQuote() { + + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE)); + assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE)); + assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE)); + + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE)); + assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE)); + assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE)); + } + + public void testRowContainsDuplicateNonEmptyValues() { + + assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a"))); + assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList(""))); + assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c"))); + assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a"))); + assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b"))); + assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", ""))); + assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", ""))); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..267ce375d6e94 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderFactoryTests.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class TextLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new TextLogStructureFinderFactory(); + + // No need to check JSON, XML, CSV, TSV, semi-colon separated values or pipe + // separated values because they come earlier in the order we check formats + + public void testCanCreateFromSampleGivenText() { + + assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java new file mode 100644 index 0000000000000..7c6a58bb68387 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinderTests.java @@ -0,0 +1,245 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; + +import java.util.Collections; +import java.util.Set; + +public class TextLogStructureFinderTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new TextLogStructureFinderFactory(); + + public void testCreateConfigsGivenElasticsearchLog() throws Exception { + assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.SEMI_STRUCTURED_TEXT, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertNull(structure.getExcludeLinesPattern()); + assertEquals("^\\[\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); + assertNull(structure.getSeparator()); + assertNull(structure.getHasHeaderRow()); + assertNull(structure.getShouldTrimFields()); + assertEquals("\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*", structure.getGrokPattern()); + assertEquals("timestamp", structure.getTimestampField()); + assertEquals(Collections.singletonList("ISO8601"), structure.getTimestampFormats()); + } + + public void testCreateMultiLineMessageStartRegexGivenNoPrefaces() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^" + simpleDateRegex.replaceFirst("^\\\\b", ""), + TextLogStructureFinder.createMultiLineMessageStartRegex(Collections.emptySet(), simpleDateRegex)); + } + } + + public void testCreateMultiLineMessageStartRegexGivenOneEmptyPreface() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^" + simpleDateRegex.replaceFirst("^\\\\b", ""), + TextLogStructureFinder.createMultiLineMessageStartRegex(Collections.singleton(""), simpleDateRegex)); + } + } + + public void testCreateMultiLineMessageStartRegexGivenOneLogLevelPreface() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^\\[.*?\\] \\[" + simpleDateRegex, + TextLogStructureFinder.createMultiLineMessageStartRegex(Collections.singleton("[ERROR] ["), simpleDateRegex)); + } + } + + public void testCreateMultiLineMessageStartRegexGivenManyLogLevelPrefaces() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + Set prefaces = Sets.newHashSet("[ERROR] [", "[DEBUG] ["); + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^\\[.*?\\] \\[" + simpleDateRegex, + TextLogStructureFinder.createMultiLineMessageStartRegex(prefaces, simpleDateRegex)); + } + } + + public void testCreateMultiLineMessageStartRegexGivenManyHostnamePrefaces() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + Set prefaces = Sets.newHashSet("host-1.acme.com|", "my_host.elastic.co|"); + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^.*?\\|" + simpleDateRegex, + TextLogStructureFinder.createMultiLineMessageStartRegex(prefaces, simpleDateRegex)); + } + } + + public void testCreateMultiLineMessageStartRegexGivenManyPrefacesIncludingEmpty() { + for (TimestampFormatFinder.CandidateTimestampFormat candidateTimestampFormat : TimestampFormatFinder.ORDERED_CANDIDATE_FORMATS) { + Set prefaces = Sets.newHashSet("", "[non-standard] "); + String simpleDateRegex = candidateTimestampFormat.simplePattern.pattern(); + assertEquals("^.*?" + simpleDateRegex, + TextLogStructureFinder.createMultiLineMessageStartRegex(prefaces, simpleDateRegex)); + } + } + + public void testMostLikelyTimestampGivenAllSame() { + String sample = "[2018-06-27T11:59:22,125][INFO ][o.e.n.Node ] [node-0] initializing ...\n" + + "[2018-06-27T11:59:22,201][INFO ][o.e.e.NodeEnvironment ] [node-0] using [1] data paths, mounts [[/ (/dev/disk1)]], " + + "net usable_space [216.1gb], net total_space [464.7gb], types [hfs]\n" + + "[2018-06-27T11:59:22,202][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [494.9mb], " + + "compressed ordinary object pointers [true]\n" + + "[2018-06-27T11:59:22,204][INFO ][o.e.n.Node ] [node-0] node name [node-0], node ID [Ha1gD8nNSDqjd6PIyu3DJA]\n" + + "[2018-06-27T11:59:22,204][INFO ][o.e.n.Node ] [node-0] version[6.4.0-SNAPSHOT], pid[2785], " + + "build[default/zip/3c60efa/2018-06-26T14:55:15.206676Z], OS[Mac OS X/10.12.6/x86_64], " + + "JVM[\"Oracle Corporation\"/Java HotSpot(TM) 64-Bit Server VM/10/10+46]\n" + + "[2018-06-27T11:59:22,205][INFO ][o.e.n.Node ] [node-0] JVM arguments [-Xms1g, -Xmx1g, " + + "-XX:+UseConcMarkSweepGC, -XX:CMSInitiatingOccupancyFraction=75, -XX:+UseCMSInitiatingOccupancyOnly, " + + "-XX:+AlwaysPreTouch, -Xss1m, -Djava.awt.headless=true, -Dfile.encoding=UTF-8, -Djna.nosys=true, " + + "-XX:-OmitStackTraceInFastThrow, -Dio.netty.noUnsafe=true, -Dio.netty.noKeySetOptimization=true, " + + "-Dio.netty.recycler.maxCapacityPerThread=0, -Dlog4j.shutdownHookEnabled=false, -Dlog4j2.disable.jmx=true, " + + "-Djava.io.tmpdir=/var/folders/k5/5sqcdlps5sg3cvlp783gcz740000h0/T/elasticsearch.nFUyeMH1, " + + "-XX:+HeapDumpOnOutOfMemoryError, -XX:HeapDumpPath=data, -XX:ErrorFile=logs/hs_err_pid%p.log, " + + "-Xlog:gc*,gc+age=trace,safepoint:file=logs/gc.log:utctime,pid,tags:filecount=32,filesize=64m, " + + "-Djava.locale.providers=COMPAT, -Dio.netty.allocator.type=unpooled, -ea, -esa, -Xms512m, -Xmx512m, " + + "-Des.path.home=/Users/dave/elasticsearch/distribution/build/cluster/run node0/elasticsearch-6.4.0-SNAPSHOT, " + + "-Des.path.conf=/Users/dave/elasticsearch/distribution/build/cluster/run node0/elasticsearch-6.4.0-SNAPSHOT/config, " + + "-Des.distribution.flavor=default, -Des.distribution.type=zip]\n" + + "[2018-06-27T11:59:22,205][WARN ][o.e.n.Node ] [node-0] version [6.4.0-SNAPSHOT] is a pre-release version of " + + "Elasticsearch and is not suitable for production\n" + + "[2018-06-27T11:59:23,585][INFO ][o.e.p.PluginsService ] [node-0] loaded module [aggs-matrix-stats]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [analysis-common]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [ingest-common]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [lang-expression]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [lang-mustache]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [lang-painless]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [mapper-extras]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [parent-join]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [percolator]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [rank-eval]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [reindex]\n" + + "[2018-06-27T11:59:23,586][INFO ][o.e.p.PluginsService ] [node-0] loaded module [repository-url]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [transport-netty4]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-core]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-deprecation]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-graph]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-logstash]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-ml]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-monitoring]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-rollup]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-security]\n" + + "[2018-06-27T11:59:23,587][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-sql]\n" + + "[2018-06-27T11:59:23,588][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-upgrade]\n" + + "[2018-06-27T11:59:23,588][INFO ][o.e.p.PluginsService ] [node-0] loaded module [x-pack-watcher]\n" + + "[2018-06-27T11:59:23,588][INFO ][o.e.p.PluginsService ] [node-0] no plugins loaded\n"; + + Tuple> mostLikelyMatch = TextLogStructureFinder.mostLikelyTimestamp(sample.split("\n")); + assertNotNull(mostLikelyMatch); + assertEquals(new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), + mostLikelyMatch.v1()); + } + + public void testMostLikelyTimestampGivenExceptionTrace() { + String sample = "[2018-02-28T14:49:40,517][DEBUG][o.e.a.b.TransportShardBulkAction] [an_index][2] failed to execute bulk item " + + "(index) BulkShardRequest [[an_index][2]] containing [33] requests\n" + + "java.lang.IllegalArgumentException: Document contains at least one immense term in field=\"message.keyword\" (whose UTF8 " + + "encoding is longer than the max length 32766), all of which were skipped. Please correct the analyzer to not produce " + + "such terms. The prefix of the first immense term is: '[60, 83, 79, 65, 80, 45, 69, 78, 86, 58, 69, 110, 118, 101, 108, " + + "111, 112, 101, 32, 120, 109, 108, 110, 115, 58, 83, 79, 65, 80, 45]...', original message: bytes can be at most 32766 " + + "in length; got 49023\n" + + "\tat org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:796) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:430) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:392) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:240) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:496) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1729) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1464) " + + "~[lucene-core-7.2.1.jar:7.2.1 b2b6438b37073bee1fca40374e85bf91aa457c0b - ubuntu - 2018-01-10 00:48:43]\n" + + "\tat org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:1070) ~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.engine.InternalEngine.indexIntoLucene(InternalEngine.java:1012) " + + "~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:878) ~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:738) ~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.shard.IndexShard.applyIndexOperation(IndexShard.java:707) ~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.shard.IndexShard.applyIndexOperationOnPrimary(IndexShard.java:673) " + + "~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.executeIndexRequestOnPrimary(TransportShardBulkAction.java:548) " + + "~[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.executeIndexRequest(TransportShardBulkAction.java:140) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.executeBulkItemRequest(TransportShardBulkAction.java:236) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.performOnPrimary(TransportShardBulkAction.java:123) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:110) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.bulk.TransportShardBulkAction.shardOperationOnPrimary(TransportShardBulkAction.java:72) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryShardReference.perform" + + "(TransportReplicationAction.java:1034) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryShardReference.perform" + + "(TransportReplicationAction.java:1012) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.ReplicationOperation.execute(ReplicationOperation.java:103) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.onResponse" + + "(TransportReplicationAction.java:359) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.onResponse" + + "(TransportReplicationAction.java:299) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$1.onResponse" + + "(TransportReplicationAction.java:975) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$1.onResponse" + + "(TransportReplicationAction.java:972) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.shard.IndexShardOperationPermits.acquire(IndexShardOperationPermits.java:238) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.index.shard.IndexShard.acquirePrimaryOperationPermit(IndexShard.java:2220) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction.acquirePrimaryShardReference" + + "(TransportReplicationAction.java:984) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction.access$500(TransportReplicationAction.java:98) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$AsyncPrimaryAction.doRun" + + "(TransportReplicationAction.java:320) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler" + + ".messageReceived(TransportReplicationAction.java:295) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler" + + ".messageReceived(TransportReplicationAction.java:282) [elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:66) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.transport.TransportService$7.doRun(TransportService.java:656) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingAbstractRunnable.doRun(ThreadContext.java:635) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) " + + "[elasticsearch-6.2.1.jar:6.2.1]\n" + + "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [?:1.8.0_144]\n" + + "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [?:1.8.0_144]\n" + + "\tat java.lang.Thread.run(Thread.java:748) [?:1.8.0_144]\n"; + + Tuple> mostLikelyMatch = TextLogStructureFinder.mostLikelyTimestamp(sample.split("\n")); + assertNotNull(mostLikelyMatch); + + // Even though many lines have a timestamp near the end (in the Lucene version information), + // these are so far along the lines that the weight of the timestamp near the beginning of the + // first line should take precedence + assertEquals(new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), + mostLikelyMatch.v1()); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinderTests.java new file mode 100644 index 0000000000000..cf1b65d1be234 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TimestampFormatFinderTests.java @@ -0,0 +1,242 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import org.elasticsearch.common.collect.Tuple; +import org.elasticsearch.xpack.ml.logstructurefinder.TimestampFormatFinder.TimestampMatch; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; + +import java.util.Arrays; +import java.util.Locale; + +public class TimestampFormatFinderTests extends LogStructureTestCase { + + public void testFindFirstMatchGivenNoMatch() { + + assertNull(TimestampFormatFinder.findFirstMatch("")); + assertNull(TimestampFormatFinder.findFirstMatch("no timestamps in here")); + assertNull(TimestampFormatFinder.findFirstMatch(":::")); + assertNull(TimestampFormatFinder.findFirstMatch("/+")); + } + + public void testFindFirstMatchGivenOnlyIso8601() { + + TimestampMatch expected = new TimestampMatch(7, "", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", + ""); + + checkAndValidateDateFormat(expected, "2018-05-15T16:14:56,374Z", 1526400896374L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374+0100", 1526400896374L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374+01:00", 1526400896374L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56,374", 1526400896374L); + checkAndValidateDateFormat(expected, "2018-05-15T16:14:56Z", 1526400896000L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56+0100", 1526400896000L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56+01:00", 1526400896000L); + checkAndValidateDateFormat(expected, "2018-05-15T17:14:56", 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 16:14:56,374Z", + 1526400896374L); + checkAndValidateDateFormat(new TimestampMatch(1, "", "YYYY-MM-dd HH:mm:ss,SSSZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374+0100", + 1526400896374L); + checkAndValidateDateFormat(new TimestampMatch(2, "", "YYYY-MM-dd HH:mm:ss,SSSZZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374+01:00", + 1526400896374L); + checkAndValidateDateFormat(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss,SSS", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56,374", 1526400896374L); + checkAndValidateDateFormat(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 16:14:56Z", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(4, "", "YYYY-MM-dd HH:mm:ssZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56+0100", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(5, "", "YYYY-MM-dd HH:mm:ssZZ", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56+01:00", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(6, "", "YYYY-MM-dd HH:mm:ss", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", ""), "2018-05-15 17:14:56", 1526400896000L); + } + + public void testFindFirstMatchGivenOnlyKnownDateFormat() { + + // Note: some of the time formats give millisecond accuracy, some second accuracy and some minute accuracy + + checkAndValidateDateFormat(new TimestampMatch(0, "", "YYYY-MM-dd HH:mm:ss,SSS Z", + "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2},\\d{3}", "TOMCAT_DATESTAMP", ""), "2018-05-15 17:14:56,374 +0100", + 1526400896374L); + + checkAndValidateDateFormat(new TimestampMatch(8, "", "EEE MMM dd YYYY HH:mm:ss zzz", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC822", ""), + "Tue May 15 2018 16:14:56 UTC", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(9, "", "EEE MMM dd YYYY HH:mm zzz", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC822", ""), + "Tue May 15 2018 16:14 UTC", 1526400840000L); + + checkAndValidateDateFormat(new TimestampMatch(10, "", "EEE, dd MMM YYYY HH:mm:ss ZZ", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""), + "Tue, 15 May 2018 17:14:56 +01:00", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(11, "", "EEE, dd MMM YYYY HH:mm:ss Z", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""), + "Tue, 15 May 2018 17:14:56 +0100", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(12, "", "EEE, dd MMM YYYY HH:mm ZZ", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""), + "Tue, 15 May 2018 17:14 +01:00", 1526400840000L); + checkAndValidateDateFormat(new TimestampMatch(13, "", "EEE, dd MMM YYYY HH:mm Z", + "\\b[A-Z]\\S{2,8}, \\d{1,2} [A-Z]\\S{2,8} \\d{4} \\d{2}:\\d{2} ", "DATESTAMP_RFC2822", ""), "Tue, 15 May 2018 17:14 +0100", + 1526400840000L); + + checkAndValidateDateFormat(new TimestampMatch(14, "", "EEE MMM dd HH:mm:ss zzz YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", "DATESTAMP_OTHER", ""), + "Tue May 15 16:14:56 UTC 2018", 1526400896000L); + checkAndValidateDateFormat(new TimestampMatch(15, "", "EEE MMM dd HH:mm zzz YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2} [A-Z]{3,4} \\d{4}\\b", "DATESTAMP_OTHER", ""), + "Tue May 15 16:14 UTC 2018", 1526400840000L); + + checkAndValidateDateFormat(new TimestampMatch(16, "", "YYYYMMddHHmmss", "\\b\\d{14}\\b", "DATESTAMP_EVENTLOG", ""), + "20180515171456", 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(17, "", "EEE MMM dd HH:mm:ss YYYY", + "\\b[A-Z]\\S{2,8} [A-Z]\\S{2,8} \\d{1,2} \\d{2}:\\d{2}:\\d{2} \\d{4}\\b", "HTTPDERROR_DATE", ""), + "Tue May 15 17:14:56 2018", 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(18, "", Arrays.asList("MMM dd HH:mm:ss.SSS", "MMM d HH:mm:ss.SSS"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", "SYSLOGTIMESTAMP", ""), "May 15 17:14:56.725", 1526400896725L); + checkAndValidateDateFormat(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", ""), "May 15 17:14:56", 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(20, "", "dd/MMM/YYYY:HH:mm:ss Z", + "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE", ""), "15/May/2018:17:14:56 +0100", 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(21, "", "MMM dd, YYYY K:mm:ss a", + "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP", ""), "May 15, 2018 5:14:56 PM", + 1526400896000L); + + checkAndValidateDateFormat(new TimestampMatch(22, "", Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{4} \\d{2}:\\d{2}:\\d{2}\\b", "CISCOTIMESTAMP", ""), "May 15 2018 17:14:56", + 1526400896000L); + } + + public void testFindFirstMatchGivenOnlySystemDate() { + + assertEquals(new TimestampMatch(23, "", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""), + TimestampFormatFinder.findFirstMatch("1526400896374")); + assertEquals(new TimestampMatch(23, "", "UNIX_MS", "\\b\\d{13}\\b", "POSINT", ""), + TimestampFormatFinder.findFirstFullMatch("1526400896374")); + + assertEquals(new TimestampMatch(24, "", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""), + TimestampFormatFinder.findFirstMatch("1526400896.736")); + assertEquals(new TimestampMatch(24, "", "UNIX", "\\b\\d{10}\\.\\d{3,9}\\b", "NUMBER", ""), + TimestampFormatFinder.findFirstFullMatch("1526400896.736")); + assertEquals(new TimestampMatch(25, "", "UNIX", "\\b\\d{10}\\b", "POSINT", ""), + TimestampFormatFinder.findFirstMatch("1526400896")); + assertEquals(new TimestampMatch(25, "", "UNIX", "\\b\\d{10}\\b", "POSINT", ""), + TimestampFormatFinder.findFirstFullMatch("1526400896")); + + assertEquals(new TimestampMatch(26, "", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""), + TimestampFormatFinder.findFirstMatch("400000005afb159a164ac980")); + assertEquals(new TimestampMatch(26, "", "TAI64N", "\\b[0-9A-Fa-f]{24}\\b", "BASE16NUM", ""), + TimestampFormatFinder.findFirstFullMatch("400000005afb159a164ac980")); + } + + private void checkAndValidateDateFormat(TimestampMatch expected, String text, long expectedEpochMs) { + + assertEquals(expected, TimestampFormatFinder.findFirstMatch(text)); + assertEquals(expected, TimestampFormatFinder.findFirstFullMatch(text)); + + // All the test times are for Tue May 15 2018 16:14:56 UTC, which is 17:14:56 in London + DateTimeZone zone = DateTimeZone.forID("Europe/London"); + DateTime parsed; + for (int i = 0; i < expected.dateFormats.size(); ++i) { + try { + String dateFormat = expected.dateFormats.get(i); + switch (dateFormat) { + case "ISO8601": + parsed = ISODateTimeFormat.dateTimeParser().withZone(zone).withDefaultYear(2018).parseDateTime(text); + break; + default: + DateTimeFormatter parser = DateTimeFormat.forPattern(dateFormat).withZone(zone).withLocale(Locale.UK); + parsed = parser.withDefaultYear(2018).parseDateTime(text); + break; + } + if (expectedEpochMs == parsed.getMillis()) { + break; + } + // If the last one isn't right then propagate + if (i == expected.dateFormats.size() - 1) { + assertEquals(expectedEpochMs, parsed.getMillis()); + } + } catch (RuntimeException e) { + // If the last one throws then propagate + if (i == expected.dateFormats.size() - 1) { + throw e; + } + } + } + assertTrue(expected.simplePattern.matcher(text).find()); + } + + public void testFindFirstMatchGivenRealLogMessages() { + + assertEquals(new TimestampMatch(7, "[", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", "TIMESTAMP_ISO8601", + "][INFO ][o.e.e.NodeEnvironment ] [node-0] heap size [3.9gb], compressed ordinary object pointers [true]"), + TimestampFormatFinder.findFirstMatch("[2018-05-11T17:07:29,553][INFO ][o.e.e.NodeEnvironment ] [node-0] " + + "heap size [3.9gb], compressed ordinary object pointers [true]")); + + assertEquals(new TimestampMatch(20, "192.168.62.101 - - [", "dd/MMM/YYYY:HH:mm:ss Z", + "\\b\\d{2}/[A-Z]\\S{2}/\\d{4}:\\d{2}:\\d{2}:\\d{2} ", "HTTPDATE", + "] \"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384"), + TimestampFormatFinder.findFirstMatch("192.168.62.101 - - [29/Jun/2016:12:11:31 +0000] " + + "\"POST //apiserv:8080/engine/v2/jobs HTTP/1.1\" 201 42 \"-\" \"curl/7.46.0\" 384")); + + assertEquals(new TimestampMatch(21, "", "MMM dd, YYYY K:mm:ss a", + "\\b[A-Z]\\S{2,8} \\d{1,2}, \\d{4} \\d{1,2}:\\d{2}:\\d{2} [AP]M\\b", "CATALINA_DATESTAMP", + " org.apache.tomcat.util.http.Parameters processParameters"), + TimestampFormatFinder.findFirstMatch("Aug 29, 2009 12:03:57 AM org.apache.tomcat.util.http.Parameters processParameters")); + + assertEquals(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", " esxi1.acme.com Vpxa: " + + "[3CB3FB90 verbose 'vpxavpxaInvtVm' opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed"), + TimestampFormatFinder.findFirstMatch("Oct 19 17:04:44 esxi1.acme.com Vpxa: [3CB3FB90 verbose 'vpxavpxaInvtVm' " + + "opID=WFU-33d82c31] [VpxaInvtVmChangeListener] Guest DiskInfo Changed")); + + assertEquals(new TimestampMatch(7, "559550912540598297\t", "ISO8601", "\\b\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", + "TIMESTAMP_ISO8601", + "\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp"), + TimestampFormatFinder.findFirstMatch("559550912540598297\t2016-04-20T14:06:53\t2016-04-20T21:06:53Z\t38545844\tserv02nw07\t" + + "192.168.114.28\tAuthpriv\tInfo\tsshd\tsubsystem request for sftp")); + + assertEquals(new TimestampMatch(19, "", Arrays.asList("MMM dd HH:mm:ss", "MMM d HH:mm:ss"), + "\\b[A-Z]\\S{2,8} {1,2}\\d{1,2} \\d{2}:\\d{2}:\\d{2}\\b", "SYSLOGTIMESTAMP", + " dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving 'www.elastic.co/A/IN': 95.110.68.206#53"), + TimestampFormatFinder.findFirstMatch("Sep 8 11:55:35 dnsserv named[22529]: error (unexpected RCODE REFUSED) resolving " + + "'www.elastic.co/A/IN': 95.110.68.206#53")); + + assertEquals(new TimestampMatch(3, "", "YYYY-MM-dd HH:mm:ss.SSSSSS", "\\b\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}", + "TIMESTAMP_ISO8601", + "|INFO |VirtualServer |1 |client 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client " + + "'User1'(id:2) in channel '3er Instanz'(id:2)"), + TimestampFormatFinder.findFirstMatch("2018-01-06 19:22:20.106822|INFO |VirtualServer |1 |client " + + " 'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel '3er Instanz'(id:2)")); + } + + public void testInterpretFractionalSeconds() { + assertEquals(new Tuple<>(',', 0), TimestampFormatFinder.interpretFractionalSeconds("Sep 8 11:55:35")); + assertEquals(new Tuple<>(',', 0), TimestampFormatFinder.interpretFractionalSeconds("29/Jun/2016:12:11:31 +0000")); + assertEquals(new Tuple<>('.', 6), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06 17:21:25.764368")); + assertEquals(new Tuple<>(',', 9), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764363438")); + assertEquals(new Tuple<>(',', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764")); + assertEquals(new Tuple<>('.', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25.764")); + assertEquals(new Tuple<>('.', 6), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06 17:21:25.764368Z")); + assertEquals(new Tuple<>(',', 9), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764363438Z")); + assertEquals(new Tuple<>(',', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764Z")); + assertEquals(new Tuple<>('.', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25.764Z")); + assertEquals(new Tuple<>('.', 6), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06 17:21:25.764368 Z")); + assertEquals(new Tuple<>(',', 9), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764363438 Z")); + assertEquals(new Tuple<>(',', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25,764 Z")); + assertEquals(new Tuple<>('.', 3), TimestampFormatFinder.interpretFractionalSeconds("2018-01-06T17:21:25.764 Z")); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..1c8acc14d3288 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/TsvLogStructureFinderFactoryTests.java @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class TsvLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new TsvLogStructureFinderFactory(); + + // No need to check JSON, XML or CSV because they come earlier in the order we check formats + + public void testCanCreateFromSampleGivenTsv() { + + assertTrue(factory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java new file mode 100644 index 0000000000000..27eb4ede040b0 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderFactoryTests.java @@ -0,0 +1,43 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +public class XmlLogStructureFinderFactoryTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new XmlLogStructureFinderFactory(); + + // No need to check JSON because it comes earlier in the order we check formats + + public void testCanCreateFromSampleGivenXml() { + + assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE)); + } + + public void testCanCreateFromSampleGivenCsv() { + + assertFalse(factory.canCreateFromSample(explanation, CSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenTsv() { + + assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); + } + + public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenPipeSeparatedValues() { + + assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); + } + + public void testCanCreateFromSampleGivenText() { + + assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE)); + } +} diff --git a/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java new file mode 100644 index 0000000000000..0d04df152ef00 --- /dev/null +++ b/x-pack/plugin/ml/log-structure-finder/src/test/java/org/elasticsearch/xpack/ml/logstructurefinder/XmlLogStructureFinderTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +package org.elasticsearch.xpack.ml.logstructurefinder; + +import java.util.Collections; + +public class XmlLogStructureFinderTests extends LogStructureTestCase { + + private LogStructureFinderFactory factory = new XmlLogStructureFinderFactory(); + + public void testCreateConfigsGivenGoodXml() throws Exception { + assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + LogStructureFinder structureFinder = factory.createFromSample(explanation, XML_SAMPLE, charset, hasByteOrderMarker); + + LogStructure structure = structureFinder.getStructure(); + + assertEquals(LogStructure.Format.XML, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + assertNull(structure.getExcludeLinesPattern()); + assertEquals("^\\s* config = new HashMap<>(); ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(null, "_tag", config)); - assertThat(e.getHeader("property_name").get(0), equalTo("field")); - assertThat(e.getHeader("processor_type").get(0), equalTo(SetSecurityUserProcessor.TYPE)); - assertThat(e.getHeader("processor_tag").get(0), equalTo("_tag")); + assertThat(e.getMetadata("es.property_name").get(0), equalTo("field")); + assertThat(e.getMetadata("es.processor_type").get(0), equalTo(SetSecurityUserProcessor.TYPE)); + assertThat(e.getMetadata("es.processor_tag").get(0), equalTo("_tag")); } public void testProcessor_validProperties() throws Exception { @@ -52,9 +52,9 @@ public void testProcessor_invalidProperties() throws Exception { config.put("field", "_field"); config.put("properties", Arrays.asList("invalid")); ElasticsearchParseException e = expectThrows(ElasticsearchParseException.class, () -> factory.create(null, "_tag", config)); - assertThat(e.getHeader("property_name").get(0), equalTo("properties")); - assertThat(e.getHeader("processor_type").get(0), equalTo(SetSecurityUserProcessor.TYPE)); - assertThat(e.getHeader("processor_tag").get(0), equalTo("_tag")); + assertThat(e.getMetadata("es.property_name").get(0), equalTo("properties")); + assertThat(e.getMetadata("es.processor_type").get(0), equalTo(SetSecurityUserProcessor.TYPE)); + assertThat(e.getMetadata("es.processor_tag").get(0), equalTo("_tag")); } } diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequest.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequest.java new file mode 100644 index 0000000000000..1b7450de0929c --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequest.java @@ -0,0 +1,75 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; + +import java.util.Objects; + +public class DeleteJobRequest extends ActionRequest { + + private String jobId; + private boolean force; + + public DeleteJobRequest(String jobId) { + this.jobId = Objects.requireNonNull(jobId, "[job_id] must not be null"); + } + + public String getJobId() { + return jobId; + } + + public void setJobId(String jobId) { + this.jobId = Objects.requireNonNull(jobId, "[job_id] must not be null"); + } + + public boolean isForce() { + return force; + } + + public void setForce(boolean force) { + this.force = force; + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public int hashCode() { + return Objects.hash(jobId, force); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj == null || obj.getClass() != getClass()) { + return false; + } + + DeleteJobRequest other = (DeleteJobRequest) obj; + return Objects.equals(jobId, other.jobId) && Objects.equals(force, other.force); + } + +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponse.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponse.java new file mode 100644 index 0000000000000..0b4faa38f545f --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponse.java @@ -0,0 +1,60 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +public class DeleteJobResponse extends AcknowledgedResponse { + + public DeleteJobResponse(boolean acknowledged) { + super(acknowledged); + } + + public DeleteJobResponse() { + } + + public static DeleteJobResponse fromXContent(XContentParser parser) throws IOException { + AcknowledgedResponse response = AcknowledgedResponse.fromXContent(parser); + return new DeleteJobResponse(response.isAcknowledged()); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + DeleteJobResponse that = (DeleteJobResponse) other; + return isAcknowledged() == that.isAcknowledged(); + } + + @Override + public int hashCode() { + return Objects.hash(isAcknowledged()); + } + +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequest.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequest.java new file mode 100644 index 0000000000000..a18a18bb55a14 --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequest.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.protocol.xpack.ml.job.config.Job; + +import java.io.IOException; +import java.util.Objects; + +public class OpenJobRequest extends ActionRequest implements ToXContentObject { + + public static final ParseField TIMEOUT = new ParseField("timeout"); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "open_job_request", true, a -> new OpenJobRequest((String) a[0])); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), Job.ID); + PARSER.declareString((request, val) -> request.setTimeout(TimeValue.parseTimeValue(val, TIMEOUT.getPreferredName())), TIMEOUT); + } + + public static OpenJobRequest fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + private String jobId; + private TimeValue timeout; + + public OpenJobRequest(String jobId) { + this.jobId = Objects.requireNonNull(jobId, "[job_id] must not be null"); + } + + public String getJobId() { + return jobId; + } + + public void setJobId(String jobId) { + this.jobId = Objects.requireNonNull(jobId, "[job_id] must not be null"); + } + + public TimeValue getTimeout() { + return timeout; + } + + public void setTimeout(TimeValue timeout) { + this.timeout = timeout; + } + + @Override + public ActionRequestValidationException validate() { + return null; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + builder.startObject(); + builder.field(Job.ID.getPreferredName(), jobId); + if (timeout != null) { + builder.field(TIMEOUT.getPreferredName(), timeout.getStringRep()); + } + builder.endObject(); + return builder; + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public int hashCode() { + return Objects.hash(jobId, timeout); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + OpenJobRequest that = (OpenJobRequest) other; + return Objects.equals(jobId, that.jobId) && Objects.equals(timeout, that.timeout); + } +} diff --git a/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponse.java b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponse.java new file mode 100644 index 0000000000000..d8850ddbbe3a8 --- /dev/null +++ b/x-pack/protocol/src/main/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponse.java @@ -0,0 +1,88 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +public class OpenJobResponse extends ActionResponse implements ToXContentObject { + + private static final ParseField OPENED = new ParseField("opened"); + + public static final ObjectParser PARSER = new ObjectParser<>("open_job_response", true, OpenJobResponse::new); + + static { + PARSER.declareBoolean(OpenJobResponse::setOpened, OPENED); + } + + private boolean opened; + + OpenJobResponse() { + } + + public OpenJobResponse(boolean opened) { + this.opened = opened; + } + + public static OpenJobResponse fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + public boolean isOpened() { + return opened; + } + + public void setOpened(boolean opened) { + this.opened = opened; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + + if (other == null || getClass() != other.getClass()) { + return false; + } + + OpenJobResponse that = (OpenJobResponse) other; + return isOpened() == that.isOpened(); + } + + @Override + public int hashCode() { + return Objects.hash(isOpened()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(OPENED.getPreferredName(), opened); + builder.endObject(); + return builder; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequestTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequestTests.java new file mode 100644 index 0000000000000..fb8a38fa0c68e --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobRequestTests.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.protocol.xpack.ml.job.config.JobTests; +import org.elasticsearch.test.ESTestCase; + +public class DeleteJobRequestTests extends ESTestCase { + + private DeleteJobRequest createTestInstance() { + return new DeleteJobRequest(JobTests.randomValidJobId()); + } + + public void test_WithNullJobId() { + NullPointerException ex = expectThrows(NullPointerException.class, () -> new DeleteJobRequest(null)); + assertEquals("[job_id] must not be null", ex.getMessage()); + + ex = expectThrows(NullPointerException.class, () -> createTestInstance().setJobId(null)); + assertEquals("[job_id] must not be null", ex.getMessage()); + } + + public void test_WithForce() { + DeleteJobRequest deleteJobRequest = createTestInstance(); + assertFalse(deleteJobRequest.isForce()); + + deleteJobRequest.setForce(true); + assertTrue(deleteJobRequest.isForce()); + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponseTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponseTests.java new file mode 100644 index 0000000000000..a73179a08983d --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/DeleteJobResponseTests.java @@ -0,0 +1,42 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class DeleteJobResponseTests extends AbstractXContentTestCase { + + @Override + protected DeleteJobResponse createTestInstance() { + return new DeleteJobResponse(); + } + + @Override + protected DeleteJobResponse doParseInstance(XContentParser parser) throws IOException { + return DeleteJobResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequestTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequestTests.java new file mode 100644 index 0000000000000..242f0cf4e8a5a --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobRequestTests.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.protocol.xpack.ml.job.config.JobTests; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class OpenJobRequestTests extends AbstractXContentTestCase { + + @Override + protected OpenJobRequest createTestInstance() { + OpenJobRequest openJobRequest = new OpenJobRequest(JobTests.randomValidJobId()); + if (randomBoolean()) { + openJobRequest.setTimeout(TimeValue.timeValueSeconds(randomIntBetween(1, Integer.MAX_VALUE))); + } + return openJobRequest; + } + + @Override + protected OpenJobRequest doParseInstance(XContentParser parser) throws IOException { + return OpenJobRequest.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return true; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponseTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponseTests.java new file mode 100644 index 0000000000000..aadfb236d3a9b --- /dev/null +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/OpenJobResponseTests.java @@ -0,0 +1,42 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.protocol.xpack.ml; + +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class OpenJobResponseTests extends AbstractXContentTestCase { + + @Override + protected OpenJobResponse createTestInstance() { + return new OpenJobResponse(randomBoolean()); + } + + @Override + protected OpenJobResponse doParseInstance(XContentParser parser) throws IOException { + return OpenJobResponse.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +} diff --git a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/PutJobRequestTests.java b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/PutJobRequestTests.java index 448c40a4d2fa1..165934224b905 100644 --- a/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/PutJobRequestTests.java +++ b/x-pack/protocol/src/test/java/org/elasticsearch/protocol/xpack/ml/PutJobRequestTests.java @@ -23,7 +23,6 @@ import org.elasticsearch.protocol.xpack.ml.job.config.JobTests; import org.elasticsearch.test.AbstractXContentTestCase; -import java.io.IOException; public class PutJobRequestTests extends AbstractXContentTestCase { @@ -33,7 +32,7 @@ protected PutJobRequest createTestInstance() { } @Override - protected PutJobRequest doParseInstance(XContentParser parser) throws IOException { + protected PutJobRequest doParseInstance(XContentParser parser) { return new PutJobRequest(Job.PARSER.apply(parser, null).build()); } diff --git a/x-pack/qa/full-cluster-restart/src/test/java/org/elasticsearch/xpack/restart/FullClusterRestartIT.java b/x-pack/qa/full-cluster-restart/src/test/java/org/elasticsearch/xpack/restart/FullClusterRestartIT.java index 24303b8342b7e..6ead87aba6103 100644 --- a/x-pack/qa/full-cluster-restart/src/test/java/org/elasticsearch/xpack/restart/FullClusterRestartIT.java +++ b/x-pack/qa/full-cluster-restart/src/test/java/org/elasticsearch/xpack/restart/FullClusterRestartIT.java @@ -325,6 +325,7 @@ public void testRollupAfterRestart() throws Exception { } } + @AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/32773") public void testRollupIDSchemeAfterRestart() throws Exception { assumeTrue("Rollup can be tested with 6.3.0 and onwards", oldClusterVersion.onOrAfter(Version.V_6_3_0)); assumeTrue("Rollup ID scheme changed in 6.4", oldClusterVersion.before(Version.V_6_4_0)); diff --git a/x-pack/qa/security-example-spi-extension/src/main/java/org/elasticsearch/example/realm/CustomRealm.java b/x-pack/qa/security-example-spi-extension/src/main/java/org/elasticsearch/example/realm/CustomRealm.java index af3fb160e133f..c6502c05d252f 100644 --- a/x-pack/qa/security-example-spi-extension/src/main/java/org/elasticsearch/example/realm/CustomRealm.java +++ b/x-pack/qa/security-example-spi-extension/src/main/java/org/elasticsearch/example/realm/CustomRealm.java @@ -12,7 +12,7 @@ import org.elasticsearch.xpack.core.security.authc.AuthenticationToken; import org.elasticsearch.xpack.core.security.authc.Realm; import org.elasticsearch.xpack.core.security.authc.RealmConfig; -import org.elasticsearch.xpack.core.security.authc.support.CharArrays; +import org.elasticsearch.common.CharArrays; import org.elasticsearch.xpack.core.security.authc.support.UsernamePasswordToken; import org.elasticsearch.protocol.xpack.security.User;